diff options
755 files changed, 22547 insertions, 7418 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-cache_disable b/Documentation/ABI/testing/sysfs-devices-cache_disable new file mode 100644 index 000000000000..175bb4f70512 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-cache_disable @@ -0,0 +1,18 @@ +What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X +Date: August 2008 +KernelVersion: 2.6.27 +Contact: mark.langsdorf@amd.com +Description: These files exist in every cpu's cache index directories. + There are currently 2 cache_disable_# files in each + directory. Reading from these files on a supported + processor will return that cache disable index value + for that processor and node. Writing to one of these + files will cause the specificed cache index to be disabled. + + Currently, only AMD Family 10h Processors support cache index + disable, and only for their L3 caches. See the BIOS and + Kernel Developer's Guide at + http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116-Public-GH-BKDG_3.20_2-4-09.pdf + for formatting information and other details on the + cache index disable. +Users: joachim.deguara@amd.com diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab new file mode 100644 index 000000000000..6dcf75e594fb --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-slab @@ -0,0 +1,479 @@ +What: /sys/kernel/slab +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The /sys/kernel/slab directory contains a snapshot of the + internal state of the SLUB allocator for each cache. Certain + files may be modified to change the behavior of the cache (and + any cache it aliases, if any). +Users: kernel memory tuning tools + +What: /sys/kernel/slab/cache/aliases +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The aliases file is read-only and specifies how many caches + have merged into this cache. + +What: /sys/kernel/slab/cache/align +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The align file is read-only and specifies the cache's object + alignment in bytes. + +What: /sys/kernel/slab/cache/alloc_calls +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The alloc_calls file is read-only and lists the kernel code + locations from which allocations for this cache were performed. + The alloc_calls file only contains information if debugging is + enabled for that cache (see Documentation/vm/slub.txt). + +What: /sys/kernel/slab/cache/alloc_fastpath +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The alloc_fastpath file is read-only and specifies how many + objects have been allocated using the fast path. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/alloc_from_partial +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The alloc_from_partial file is read-only and specifies how + many times a cpu slab has been full and it has been refilled + by using a slab from the list of partially used slabs. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/alloc_refill +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The alloc_refill file is read-only and specifies how many + times the per-cpu freelist was empty but there were objects + available as the result of remote cpu frees. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/alloc_slab +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The alloc_slab file is read-only and specifies how many times + a new slab had to be allocated from the page allocator. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/alloc_slowpath +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The alloc_slowpath file is read-only and specifies how many + objects have been allocated using the slow path because of a + refill or allocation from a partial or new slab. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/cache_dma +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The cache_dma file is read-only and specifies whether objects + are from ZONE_DMA. + Available when CONFIG_ZONE_DMA is enabled. + +What: /sys/kernel/slab/cache/cpu_slabs +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The cpu_slabs file is read-only and displays how many cpu slabs + are active and their NUMA locality. + +What: /sys/kernel/slab/cache/cpuslab_flush +Date: April 2009 +KernelVersion: 2.6.31 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The file cpuslab_flush is read-only and specifies how many + times a cache's cpu slabs have been flushed as the result of + destroying or shrinking a cache, a cpu going offline, or as + the result of forcing an allocation from a certain node. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/ctor +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The ctor file is read-only and specifies the cache's object + constructor function, which is invoked for each object when a + new slab is allocated. + +What: /sys/kernel/slab/cache/deactivate_empty +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The file deactivate_empty is read-only and specifies how many + times an empty cpu slab was deactivated. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/deactivate_full +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The file deactivate_full is read-only and specifies how many + times a full cpu slab was deactivated. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/deactivate_remote_frees +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The file deactivate_remote_frees is read-only and specifies how + many times a cpu slab has been deactivated and contained free + objects that were freed remotely. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/deactivate_to_head +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The file deactivate_to_head is read-only and specifies how + many times a partial cpu slab was deactivated and added to the + head of its node's partial list. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/deactivate_to_tail +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The file deactivate_to_tail is read-only and specifies how + many times a partial cpu slab was deactivated and added to the + tail of its node's partial list. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/destroy_by_rcu +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The destroy_by_rcu file is read-only and specifies whether + slabs (not objects) are freed by rcu. + +What: /sys/kernel/slab/cache/free_add_partial +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The file free_add_partial is read-only and specifies how many + times an object has been freed in a full slab so that it had to + added to its node's partial list. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/free_calls +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The free_calls file is read-only and lists the locations of + object frees if slab debugging is enabled (see + Documentation/vm/slub.txt). + +What: /sys/kernel/slab/cache/free_fastpath +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The free_fastpath file is read-only and specifies how many + objects have been freed using the fast path because it was an + object from the cpu slab. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/free_frozen +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The free_frozen file is read-only and specifies how many + objects have been freed to a frozen slab (i.e. a remote cpu + slab). + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/free_remove_partial +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The file free_remove_partial is read-only and specifies how + many times an object has been freed to a now-empty slab so + that it had to be removed from its node's partial list. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/free_slab +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The free_slab file is read-only and specifies how many times an + empty slab has been freed back to the page allocator. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/free_slowpath +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The free_slowpath file is read-only and specifies how many + objects have been freed using the slow path (i.e. to a full or + partial slab). + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/hwcache_align +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The hwcache_align file is read-only and specifies whether + objects are aligned on cachelines. + +What: /sys/kernel/slab/cache/min_partial +Date: February 2009 +KernelVersion: 2.6.30 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + David Rientjes <rientjes@google.com> +Description: + The min_partial file specifies how many empty slabs shall + remain on a node's partial list to avoid the overhead of + allocating new slabs. Such slabs may be reclaimed by utilizing + the shrink file. + +What: /sys/kernel/slab/cache/object_size +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The object_size file is read-only and specifies the cache's + object size. + +What: /sys/kernel/slab/cache/objects +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The objects file is read-only and displays how many objects are + active and from which nodes they are from. + +What: /sys/kernel/slab/cache/objects_partial +Date: April 2008 +KernelVersion: 2.6.26 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The objects_partial file is read-only and displays how many + objects are on partial slabs and from which nodes they are + from. + +What: /sys/kernel/slab/cache/objs_per_slab +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The file objs_per_slab is read-only and specifies how many + objects may be allocated from a single slab of the order + specified in /sys/kernel/slab/cache/order. + +What: /sys/kernel/slab/cache/order +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The order file specifies the page order at which new slabs are + allocated. It is writable and can be changed to increase the + number of objects per slab. If a slab cannot be allocated + because of fragmentation, SLUB will retry with the minimum order + possible depending on its characteristics. + +What: /sys/kernel/slab/cache/order_fallback +Date: April 2008 +KernelVersion: 2.6.26 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The file order_fallback is read-only and specifies how many + times an allocation of a new slab has not been possible at the + cache's order and instead fallen back to its minimum possible + order. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/partial +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The partial file is read-only and displays how long many + partial slabs there are and how long each node's list is. + +What: /sys/kernel/slab/cache/poison +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The poison file specifies whether objects should be poisoned + when a new slab is allocated. + +What: /sys/kernel/slab/cache/reclaim_account +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The reclaim_account file specifies whether the cache's objects + are reclaimable (and grouped by their mobility). + +What: /sys/kernel/slab/cache/red_zone +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The red_zone file specifies whether the cache's objects are red + zoned. + +What: /sys/kernel/slab/cache/remote_node_defrag_ratio +Date: January 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The file remote_node_defrag_ratio specifies the percentage of + times SLUB will attempt to refill the cpu slab with a partial + slab from a remote node as opposed to allocating a new slab on + the local node. This reduces the amount of wasted memory over + the entire system but can be expensive. + Available when CONFIG_NUMA is enabled. + +What: /sys/kernel/slab/cache/sanity_checks +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The sanity_checks file specifies whether expensive checks + should be performed on free and, at minimum, enables double free + checks. Caches that enable sanity_checks cannot be merged with + caches that do not. + +What: /sys/kernel/slab/cache/shrink +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The shrink file is written when memory should be reclaimed from + a cache. Empty partial slabs are freed and the partial list is + sorted so the slabs with the fewest available objects are used + first. + +What: /sys/kernel/slab/cache/slab_size +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The slab_size file is read-only and specifies the object size + with metadata (debugging information and alignment) in bytes. + +What: /sys/kernel/slab/cache/slabs +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The slabs file is read-only and displays how long many slabs + there are (both cpu and partial) and from which nodes they are + from. + +What: /sys/kernel/slab/cache/store_user +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The store_user file specifies whether the location of + allocation or free should be tracked for a cache. + +What: /sys/kernel/slab/cache/total_objects +Date: April 2008 +KernelVersion: 2.6.26 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The total_objects file is read-only and displays how many total + objects a cache has and from which nodes they are from. + +What: /sys/kernel/slab/cache/trace +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + The trace file specifies whether object allocations and frees + should be traced. + +What: /sys/kernel/slab/cache/validate +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg <penberg@cs.helsinki.fi>, + Christoph Lameter <cl@linux-foundation.org> +Description: + Writing to the validate file causes SLUB to traverse all of its + cache's objects and check the validity of metadata. diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index d9aa43d78bcc..25fb8bcf32a2 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -704,12 +704,24 @@ this directory the following files can currently be found: The current number of free dma_debug_entries in the allocator. + dma-api/driver-filter + You can write a name of a driver into this file + to limit the debug output to requests from that + particular driver. Write an empty string to + that file to disable the filter and see + all errors again. + If you have this code compiled into your kernel it will be enabled by default. If you want to boot without the bookkeeping anyway you can provide 'dma_debug=off' as a boot parameter. This will disable DMA-API debugging. Notice that you can not enable it again at runtime. You have to reboot to do so. +If you want to see debug messages only for a special device driver you can +specify the dma_debug_driver=<drivername> parameter. This will enable the +driver filter at boot time. The debug code will only print errors for that +driver afterwards. This filter can be disabled or changed later using debugfs. + When the code disables itself at runtime this is most likely because it ran out of dma_debug_entries. These entries are preallocated at boot. The number of preallocated entries is defined per architecture. If it is too low for you diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index 068848240a8b..02cced183b2d 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -192,23 +192,24 @@ rcu/rcuhier (which displays the struct rcu_node hierarchy). The output of "cat rcu/rcudata" looks as follows: rcu: - 0 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=1 rp=3c2a dt=23301/73 dn=2 df=1882 of=0 ri=2126 ql=2 b=10 - 1 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=3 rp=39a6 dt=78073/1 dn=2 df=1402 of=0 ri=1875 ql=46 b=10 - 2 c=4010 g=4010 pq=1 pqc=4010 qp=0 rpfq=-5 rp=1d12 dt=16646/0 dn=2 df=3140 of=0 ri=2080 ql=0 b=10 - 3 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=2b50 dt=21159/1 dn=2 df=2230 of=0 ri=1923 ql=72 b=10 - 4 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1644 dt=5783/1 dn=2 df=3348 of=0 ri=2805 ql=7 b=10 - 5 c=4012 g=4013 pq=0 pqc=4011 qp=1 rpfq=3 rp=1aac dt=5879/1 dn=2 df=3140 of=0 ri=2066 ql=10 b=10 - 6 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=ed8 dt=5847/1 dn=2 df=3797 of=0 ri=1266 ql=10 b=10 - 7 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1fa2 dt=6199/1 dn=2 df=2795 of=0 ri=2162 ql=28 b=10 +rcu: + 0 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=10951/1 dn=0 df=1101 of=0 ri=36 ql=0 b=10 + 1 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=16117/1 dn=0 df=1015 of=0 ri=0 ql=0 b=10 + 2 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1445/1 dn=0 df=1839 of=0 ri=0 ql=0 b=10 + 3 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=6681/1 dn=0 df=1545 of=0 ri=0 ql=0 b=10 + 4 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1003/1 dn=0 df=1992 of=0 ri=0 ql=0 b=10 + 5 c=17829 g=17830 pq=1 pqc=17829 qp=1 dt=3887/1 dn=0 df=3331 of=0 ri=4 ql=2 b=10 + 6 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=859/1 dn=0 df=3224 of=0 ri=0 ql=0 b=10 + 7 c=17829 g=17830 pq=0 pqc=17829 qp=1 dt=3761/1 dn=0 df=1818 of=0 ri=0 ql=2 b=10 rcu_bh: - 0 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-145 rp=21d6 dt=23301/73 dn=2 df=0 of=0 ri=0 ql=0 b=10 - 1 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-170 rp=20ce dt=78073/1 dn=2 df=26 of=0 ri=5 ql=0 b=10 - 2 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-83 rp=fbd dt=16646/0 dn=2 df=28 of=0 ri=4 ql=0 b=10 - 3 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-105 rp=178c dt=21159/1 dn=2 df=28 of=0 ri=2 ql=0 b=10 - 4 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-30 rp=b54 dt=5783/1 dn=2 df=32 of=0 ri=0 ql=0 b=10 - 5 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-29 rp=df5 dt=5879/1 dn=2 df=30 of=0 ri=3 ql=0 b=10 - 6 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-28 rp=788 dt=5847/1 dn=2 df=32 of=0 ri=0 ql=0 b=10 - 7 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-53 rp=1098 dt=6199/1 dn=2 df=30 of=0 ri=3 ql=0 b=10 + 0 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=10951/1 dn=0 df=0 of=0 ri=0 ql=0 b=10 + 1 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=16117/1 dn=0 df=13 of=0 ri=0 ql=0 b=10 + 2 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1445/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 + 3 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=6681/1 dn=0 df=9 of=0 ri=0 ql=0 b=10 + 4 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1003/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 + 5 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3887/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 + 6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 + 7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 The first section lists the rcu_data structures for rcu, the second for rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system. @@ -253,12 +254,6 @@ o "pqc" indicates which grace period the last-observed quiescent o "qp" indicates that RCU still expects a quiescent state from this CPU. -o "rpfq" is the number of rcu_pending() calls on this CPU required - to induce this CPU to invoke force_quiescent_state(). - -o "rp" is low-order four hex digits of the count of how many times - rcu_pending() has been invoked on this CPU. - o "dt" is the current value of the dyntick counter that is incremented when entering or leaving dynticks idle state, either by the scheduler or by irq. The number after the "/" is the interrupt @@ -305,6 +300,9 @@ o "b" is the batch limit for this CPU. If more than this number of RCU callbacks is ready to invoke, then the remainder will be deferred. +There is also an rcu/rcudata.csv file with the same information in +comma-separated-variable spreadsheet format. + The output of "cat rcu/rcugp" looks as follows: @@ -411,3 +409,63 @@ o Each element of the form "1/1 0:127 ^0" represents one struct For example, the first entry at the lowest level shows "^0", indicating that it corresponds to bit zero in the first entry at the middle level. + + +The output of "cat rcu/rcu_pending" looks as follows: + +rcu: + 0 np=255892 qsp=53936 cbr=0 cng=14417 gpc=10033 gps=24320 nf=6445 nn=146741 + 1 np=261224 qsp=54638 cbr=0 cng=25723 gpc=16310 gps=2849 nf=5912 nn=155792 + 2 np=237496 qsp=49664 cbr=0 cng=2762 gpc=45478 gps=1762 nf=1201 nn=136629 + 3 np=236249 qsp=48766 cbr=0 cng=286 gpc=48049 gps=1218 nf=207 nn=137723 + 4 np=221310 qsp=46850 cbr=0 cng=26 gpc=43161 gps=4634 nf=3529 nn=123110 + 5 np=237332 qsp=48449 cbr=0 cng=54 gpc=47920 gps=3252 nf=201 nn=137456 + 6 np=219995 qsp=46718 cbr=0 cng=50 gpc=42098 gps=6093 nf=4202 nn=120834 + 7 np=249893 qsp=49390 cbr=0 cng=72 gpc=38400 gps=17102 nf=41 nn=144888 +rcu_bh: + 0 np=146741 qsp=1419 cbr=0 cng=6 gpc=0 gps=0 nf=2 nn=145314 + 1 np=155792 qsp=12597 cbr=0 cng=0 gpc=4 gps=8 nf=3 nn=143180 + 2 np=136629 qsp=18680 cbr=0 cng=0 gpc=7 gps=6 nf=0 nn=117936 + 3 np=137723 qsp=2843 cbr=0 cng=0 gpc=10 gps=7 nf=0 nn=134863 + 4 np=123110 qsp=12433 cbr=0 cng=0 gpc=4 gps=2 nf=0 nn=110671 + 5 np=137456 qsp=4210 cbr=0 cng=0 gpc=6 gps=5 nf=0 nn=133235 + 6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921 + 7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542 + +As always, this is once again split into "rcu" and "rcu_bh" portions. +The fields are as follows: + +o "np" is the number of times that __rcu_pending() has been invoked + for the corresponding flavor of RCU. + +o "qsp" is the number of times that the RCU was waiting for a + quiescent state from this CPU. + +o "cbr" is the number of times that this CPU had RCU callbacks + that had passed through a grace period, and were thus ready + to be invoked. + +o "cng" is the number of times that this CPU needed another + grace period while RCU was idle. + +o "gpc" is the number of times that an old grace period had + completed, but this CPU was not yet aware of it. + +o "gps" is the number of times that a new grace period had started, + but this CPU was not yet aware of it. + +o "nf" is the number of times that this CPU suspected that the + current grace period had run for too long, and thus needed to + be forced. + + Please note that "forcing" consists of sending resched IPIs + to holdout CPUs. If that CPU really still is in an old RCU + read-side critical section, then we really do have to wait for it. + The assumption behing "forcing" is that the CPU is not still in + an old RCU read-side critical section, but has not yet responded + for some other reason. + +o "nn" is the number of times that this CPU needed nothing. Alert + readers will note that the rcu "nn" number for a given CPU very + closely matches the rcu_bh "np" number for that same CPU. This + is due to short-circuit evaluation in rcu_pending(). diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt index 222437efd75a..3015da0c6b2a 100644 --- a/Documentation/filesystems/tmpfs.txt +++ b/Documentation/filesystems/tmpfs.txt @@ -133,4 +133,4 @@ RAM/SWAP in 10240 inodes and it is only accessible by root. Author: Christoph Rohland <cr@sap.com>, 1.12.01 Updated: - Hugh Dickins <hugh@veritas.com>, 4 June 2007 + Hugh Dickins, 4 June 2007 diff --git a/Documentation/futex-requeue-pi.txt b/Documentation/futex-requeue-pi.txt new file mode 100644 index 000000000000..9dc1ff4fd536 --- /dev/null +++ b/Documentation/futex-requeue-pi.txt @@ -0,0 +1,131 @@ +Futex Requeue PI +---------------- + +Requeueing of tasks from a non-PI futex to a PI futex requires +special handling in order to ensure the underlying rt_mutex is never +left without an owner if it has waiters; doing so would break the PI +boosting logic [see rt-mutex-desgin.txt] For the purposes of +brevity, this action will be referred to as "requeue_pi" throughout +this document. Priority inheritance is abbreviated throughout as +"PI". + +Motivation +---------- + +Without requeue_pi, the glibc implementation of +pthread_cond_broadcast() must resort to waking all the tasks waiting +on a pthread_condvar and letting them try to sort out which task +gets to run first in classic thundering-herd formation. An ideal +implementation would wake the highest-priority waiter, and leave the +rest to the natural wakeup inherent in unlocking the mutex +associated with the condvar. + +Consider the simplified glibc calls: + +/* caller must lock mutex */ +pthread_cond_wait(cond, mutex) +{ + lock(cond->__data.__lock); + unlock(mutex); + do { + unlock(cond->__data.__lock); + futex_wait(cond->__data.__futex); + lock(cond->__data.__lock); + } while(...) + unlock(cond->__data.__lock); + lock(mutex); +} + +pthread_cond_broadcast(cond) +{ + lock(cond->__data.__lock); + unlock(cond->__data.__lock); + futex_requeue(cond->data.__futex, cond->mutex); +} + +Once pthread_cond_broadcast() requeues the tasks, the cond->mutex +has waiters. Note that pthread_cond_wait() attempts to lock the +mutex only after it has returned to user space. This will leave the +underlying rt_mutex with waiters, and no owner, breaking the +previously mentioned PI-boosting algorithms. + +In order to support PI-aware pthread_condvar's, the kernel needs to +be able to requeue tasks to PI futexes. This support implies that +upon a successful futex_wait system call, the caller would return to +user space already holding the PI futex. The glibc implementation +would be modified as follows: + + +/* caller must lock mutex */ +pthread_cond_wait_pi(cond, mutex) +{ + lock(cond->__data.__lock); + unlock(mutex); + do { + unlock(cond->__data.__lock); + futex_wait_requeue_pi(cond->__data.__futex); + lock(cond->__data.__lock); + } while(...) + unlock(cond->__data.__lock); + /* the kernel acquired the the mutex for us */ +} + +pthread_cond_broadcast_pi(cond) +{ + lock(cond->__data.__lock); + unlock(cond->__data.__lock); + futex_requeue_pi(cond->data.__futex, cond->mutex); +} + +The actual glibc implementation will likely test for PI and make the +necessary changes inside the existing calls rather than creating new +calls for the PI cases. Similar changes are needed for +pthread_cond_timedwait() and pthread_cond_signal(). + +Implementation +-------------- + +In order to ensure the rt_mutex has an owner if it has waiters, it +is necessary for both the requeue code, as well as the waiting code, +to be able to acquire the rt_mutex before returning to user space. +The requeue code cannot simply wake the waiter and leave it to +acquire the rt_mutex as it would open a race window between the +requeue call returning to user space and the waiter waking and +starting to run. This is especially true in the uncontended case. + +The solution involves two new rt_mutex helper routines, +rt_mutex_start_proxy_lock() and rt_mutex_finish_proxy_lock(), which +allow the requeue code to acquire an uncontended rt_mutex on behalf +of the waiter and to enqueue the waiter on a contended rt_mutex. +Two new system calls provide the kernel<->user interface to +requeue_pi: FUTEX_WAIT_REQUEUE_PI and FUTEX_REQUEUE_CMP_PI. + +FUTEX_WAIT_REQUEUE_PI is called by the waiter (pthread_cond_wait() +and pthread_cond_timedwait()) to block on the initial futex and wait +to be requeued to a PI-aware futex. The implementation is the +result of a high-speed collision between futex_wait() and +futex_lock_pi(), with some extra logic to check for the additional +wake-up scenarios. + +FUTEX_REQUEUE_CMP_PI is called by the waker +(pthread_cond_broadcast() and pthread_cond_signal()) to requeue and +possibly wake the waiting tasks. Internally, this system call is +still handled by futex_requeue (by passing requeue_pi=1). Before +requeueing, futex_requeue() attempts to acquire the requeue target +PI futex on behalf of the top waiter. If it can, this waiter is +woken. futex_requeue() then proceeds to requeue the remaining +nr_wake+nr_requeue tasks to the PI futex, calling +rt_mutex_start_proxy_lock() prior to each requeue to prepare the +task as a waiter on the underlying rt_mutex. It is possible that +the lock can be acquired at this stage as well, if so, the next +waiter is woken to finish the acquisition of the lock. + +FUTEX_REQUEUE_PI accepts nr_wake and nr_requeue as arguments, but +their sum is all that really matters. futex_requeue() will wake or +requeue up to nr_wake + nr_requeue tasks. It will wake only as many +tasks as it can acquire the lock for, which in the majority of cases +should be 0 as good programming practice dictates that the caller of +either pthread_cond_broadcast() or pthread_cond_signal() acquire the +mutex prior to making the call. FUTEX_REQUEUE_PI requires that +nr_wake=1. nr_requeue should be INT_MAX for broadcast and 0 for +signal. diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface index 2f10ce6a879f..004ee161721e 100644 --- a/Documentation/hwmon/sysfs-interface +++ b/Documentation/hwmon/sysfs-interface @@ -150,6 +150,11 @@ fan[1-*]_min Fan minimum value Unit: revolution/min (RPM) RW +fan[1-*]_max Fan maximum value + Unit: revolution/min (RPM) + Only rarely supported by the hardware. + RW + fan[1-*]_input Fan input value. Unit: revolution/min (RPM) RO @@ -390,6 +395,7 @@ OR in[0-*]_min_alarm in[0-*]_max_alarm fan[1-*]_min_alarm +fan[1-*]_max_alarm temp[1-*]_min_alarm temp[1-*]_max_alarm temp[1-*]_crit_alarm diff --git a/Documentation/input/multi-touch-protocol.txt b/Documentation/input/multi-touch-protocol.txt index 9f09557aea39..a12ea3b586e6 100644 --- a/Documentation/input/multi-touch-protocol.txt +++ b/Documentation/input/multi-touch-protocol.txt @@ -18,8 +18,12 @@ Usage Anonymous finger details are sent sequentially as separate packets of ABS events. Only the ABS_MT events are recognized as part of a finger packet. The end of a packet is marked by calling the input_mt_sync() -function, which generates a SYN_MT_REPORT event. The end of multi-touch -transfer is marked by calling the usual input_sync() function. +function, which generates a SYN_MT_REPORT event. This instructs the +receiver to accept the data for the current finger and prepare to receive +another. The end of a multi-touch transfer is marked by calling the usual +input_sync() function. This instructs the receiver to act upon events +accumulated since last EV_SYN/SYN_REPORT and prepare to receive a new +set of events/packets. A set of ABS_MT events with the desired properties is defined. The events are divided into categories, to allow for partial implementation. The @@ -27,11 +31,26 @@ minimum set consists of ABS_MT_TOUCH_MAJOR, ABS_MT_POSITION_X and ABS_MT_POSITION_Y, which allows for multiple fingers to be tracked. If the device supports it, the ABS_MT_WIDTH_MAJOR may be used to provide the size of the approaching finger. Anisotropy and direction may be specified with -ABS_MT_TOUCH_MINOR, ABS_MT_WIDTH_MINOR and ABS_MT_ORIENTATION. Devices with -more granular information may specify general shapes as blobs, i.e., as a -sequence of rectangular shapes grouped together by an -ABS_MT_BLOB_ID. Finally, the ABS_MT_TOOL_TYPE may be used to specify -whether the touching tool is a finger or a pen or something else. +ABS_MT_TOUCH_MINOR, ABS_MT_WIDTH_MINOR and ABS_MT_ORIENTATION. The +ABS_MT_TOOL_TYPE may be used to specify whether the touching tool is a +finger or a pen or something else. Devices with more granular information +may specify general shapes as blobs, i.e., as a sequence of rectangular +shapes grouped together by an ABS_MT_BLOB_ID. Finally, for the few devices +that currently support it, the ABS_MT_TRACKING_ID event may be used to +report finger tracking from hardware [5]. + +Here is what a minimal event sequence for a two-finger touch would look +like: + + ABS_MT_TOUCH_MAJOR + ABS_MT_POSITION_X + ABS_MT_POSITION_Y + SYN_MT_REPORT + ABS_MT_TOUCH_MAJOR + ABS_MT_POSITION_X + ABS_MT_POSITION_Y + SYN_MT_REPORT + SYN_REPORT Event Semantics @@ -44,24 +63,24 @@ ABS_MT_TOUCH_MAJOR The length of the major axis of the contact. The length should be given in surface units. If the surface has an X times Y resolution, the largest -possible value of ABS_MT_TOUCH_MAJOR is sqrt(X^2 + Y^2), the diagonal. +possible value of ABS_MT_TOUCH_MAJOR is sqrt(X^2 + Y^2), the diagonal [4]. ABS_MT_TOUCH_MINOR The length, in surface units, of the minor axis of the contact. If the -contact is circular, this event can be omitted. +contact is circular, this event can be omitted [4]. ABS_MT_WIDTH_MAJOR The length, in surface units, of the major axis of the approaching tool. This should be understood as the size of the tool itself. The orientation of the contact and the approaching tool are assumed to be the -same. +same [4]. ABS_MT_WIDTH_MINOR The length, in surface units, of the minor axis of the approaching -tool. Omit if circular. +tool. Omit if circular [4]. The above four values can be used to derive additional information about the contact. The ratio ABS_MT_TOUCH_MAJOR / ABS_MT_WIDTH_MAJOR approximates @@ -70,14 +89,17 @@ different characteristic widths [1]. ABS_MT_ORIENTATION -The orientation of the ellipse. The value should describe half a revolution -clockwise around the touch center. The scale of the value is arbitrary, but -zero should be returned for an ellipse aligned along the Y axis of the -surface. As an example, an index finger placed straight onto the axis could -return zero orientation, something negative when twisted to the left, and -something positive when twisted to the right. This value can be omitted if -the touching object is circular, or if the information is not available in -the kernel driver. +The orientation of the ellipse. The value should describe a signed quarter +of a revolution clockwise around the touch center. The signed value range +is arbitrary, but zero should be returned for a finger aligned along the Y +axis of the surface, a negative value when finger is turned to the left, and +a positive value when finger turned to the right. When completely aligned with +the X axis, the range max should be returned. Orientation can be omitted +if the touching object is circular, or if the information is not available +in the kernel driver. Partial orientation support is possible if the device +can distinguish between the two axis, but not (uniquely) any values in +between. In such cases, the range of ABS_MT_ORIENTATION should be [0, 1] +[4]. ABS_MT_POSITION_X @@ -98,8 +120,35 @@ ABS_MT_BLOB_ID The BLOB_ID groups several packets together into one arbitrarily shaped contact. This is a low-level anonymous grouping, and should not be confused -with the high-level contactID, explained below. Most kernel drivers will -not have this capability, and can safely omit the event. +with the high-level trackingID [5]. Most kernel drivers will not have blob +capability, and can safely omit the event. + +ABS_MT_TRACKING_ID + +The TRACKING_ID identifies an initiated contact throughout its life cycle +[5]. There are currently only a few devices that support it, so this event +should normally be omitted. + + +Event Computation +----------------- + +The flora of different hardware unavoidably leads to some devices fitting +better to the MT protocol than others. To simplify and unify the mapping, +this section gives recipes for how to compute certain events. + +For devices reporting contacts as rectangular shapes, signed orientation +cannot be obtained. Assuming X and Y are the lengths of the sides of the +touching rectangle, here is a simple formula that retains the most +information possible: + + ABS_MT_TOUCH_MAJOR := max(X, Y) + ABS_MT_TOUCH_MINOR := min(X, Y) + ABS_MT_ORIENTATION := bool(X > Y) + +The range of ABS_MT_ORIENTATION should be set to [0, 1], to indicate that +the device can distinguish between a finger along the Y axis (0) and a +finger along the X axis (1). Finger Tracking @@ -109,14 +158,18 @@ The kernel driver should generate an arbitrary enumeration of the set of anonymous contacts currently on the surface. The order in which the packets appear in the event stream is not important. -The process of finger tracking, i.e., to assign a unique contactID to each +The process of finger tracking, i.e., to assign a unique trackingID to each initiated contact on the surface, is left to user space; preferably the -multi-touch X driver [3]. In that driver, the contactID stays the same and +multi-touch X driver [3]. In that driver, the trackingID stays the same and unique until the contact vanishes (when the finger leaves the surface). The problem of assigning a set of anonymous fingers to a set of identified fingers is a euclidian bipartite matching problem at each event update, and relies on a sufficiently rapid update rate. +There are a few devices that support trackingID in hardware. User space can +make use of these native identifiers to reduce bandwidth and cpu usage. + + Notes ----- @@ -136,5 +189,7 @@ could be used to derive tilt. time of writing (April 2009), the MT protocol is not yet merged, and the prototype implements finger matching, basic mouse support and two-finger scrolling. The project aims at improving the quality of current multi-touch -functionality available in the synaptics X driver, and in addition +functionality available in the Synaptics X driver, and in addition implement more advanced gestures. +[4] See the section on event computation. +[5] See the section on finger tracking. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index fcd3bfbe74e8..4a3c2209a124 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -328,11 +328,6 @@ and is between 256 and 4096 characters. It is defined in the file flushed before they will be reused, which is a lot of faster - amd_iommu_size= [HW,X86-64] - Define the size of the aperture for the AMD IOMMU - driver. Possible values are: - '32M', '64M' (default), '128M', '256M', '512M', '1G' - amijoy.map= [HW,JOY] Amiga joystick support Map of devices attached to JOY0DAT and JOY1DAT Format: <a>,<b> @@ -645,6 +640,13 @@ and is between 256 and 4096 characters. It is defined in the file DMA-API debugging code disables itself because the architectural default is too low. + dma_debug_driver=<driver_name> + With this option the DMA-API debugging driver + filter feature can be enabled at boot time. Just + pass the driver to filter for as the parameter. + The filter can be disabled or changed to another + driver later using sysfs. + dscc4.setup= [NET] dtc3181e= [HW,SCSI] @@ -1538,6 +1540,10 @@ and is between 256 and 4096 characters. It is defined in the file register save and restore. The kernel will only save legacy floating-point registers on task switch. + noxsave [BUGS=X86] Disables x86 extended register state save + and restore using xsave. The kernel will fallback to + enabling legacy floating-point and sse state. + nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or wfi(ARM) instruction doesn't work correctly and not to use it. This is also useful when using JTAG debugger. @@ -1574,6 +1580,9 @@ and is between 256 and 4096 characters. It is defined in the file noinitrd [RAM] Tells the kernel not to load any configured initial RAM disk. + nointremap [X86-64, Intel-IOMMU] Do not enable interrupt + remapping. + nointroute [IA-64] nojitter [IA64] Disables jitter checking for ITC timers. @@ -1659,6 +1668,14 @@ and is between 256 and 4096 characters. It is defined in the file oprofile.timer= [HW] Use timer interrupt instead of performance counters + oprofile.cpu_type= Force an oprofile cpu type + This might be useful if you have an older oprofile + userland or if you want common events. + Format: { archperfmon } + archperfmon: [X86] Force use of architectural + perfmon on Intel CPUs instead of the + CPU specific event set. + osst= [HW,SCSI] SCSI Tape Driver Format: <buffer_size>,<write_threshold> See also Documentation/scsi/st.txt. diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index f5b7127f54ac..7f5809eddee6 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -31,6 +31,7 @@ Contents: - Locking functions. - Interrupt disabling functions. + - Sleep and wake-up functions. - Miscellaneous functions. (*) Inter-CPU locking barrier effects. @@ -1217,6 +1218,132 @@ barriers are required in such a situation, they must be provided from some other means. +SLEEP AND WAKE-UP FUNCTIONS +--------------------------- + +Sleeping and waking on an event flagged in global data can be viewed as an +interaction between two pieces of data: the task state of the task waiting for +the event and the global data used to indicate the event. To make sure that +these appear to happen in the right order, the primitives to begin the process +of going to sleep, and the primitives to initiate a wake up imply certain +barriers. + +Firstly, the sleeper normally follows something like this sequence of events: + + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (event_indicated) + break; + schedule(); + } + +A general memory barrier is interpolated automatically by set_current_state() +after it has altered the task state: + + CPU 1 + =============================== + set_current_state(); + set_mb(); + STORE current->state + <general barrier> + LOAD event_indicated + +set_current_state() may be wrapped by: + + prepare_to_wait(); + prepare_to_wait_exclusive(); + +which therefore also imply a general memory barrier after setting the state. +The whole sequence above is available in various canned forms, all of which +interpolate the memory barrier in the right place: + + wait_event(); + wait_event_interruptible(); + wait_event_interruptible_exclusive(); + wait_event_interruptible_timeout(); + wait_event_killable(); + wait_event_timeout(); + wait_on_bit(); + wait_on_bit_lock(); + + +Secondly, code that performs a wake up normally follows something like this: + + event_indicated = 1; + wake_up(&event_wait_queue); + +or: + + event_indicated = 1; + wake_up_process(event_daemon); + +A write memory barrier is implied by wake_up() and co. if and only if they wake +something up. The barrier occurs before the task state is cleared, and so sits +between the STORE to indicate the event and the STORE to set TASK_RUNNING: + + CPU 1 CPU 2 + =============================== =============================== + set_current_state(); STORE event_indicated + set_mb(); wake_up(); + STORE current->state <write barrier> + <general barrier> STORE current->state + LOAD event_indicated + +The available waker functions include: + + complete(); + wake_up(); + wake_up_all(); + wake_up_bit(); + wake_up_interruptible(); + wake_up_interruptible_all(); + wake_up_interruptible_nr(); + wake_up_interruptible_poll(); + wake_up_interruptible_sync(); + wake_up_interruptible_sync_poll(); + wake_up_locked(); + wake_up_locked_poll(); + wake_up_nr(); + wake_up_poll(); + wake_up_process(); + + +[!] Note that the memory barriers implied by the sleeper and the waker do _not_ +order multiple stores before the wake-up with respect to loads of those stored +values after the sleeper has called set_current_state(). For instance, if the +sleeper does: + + set_current_state(TASK_INTERRUPTIBLE); + if (event_indicated) + break; + __set_current_state(TASK_RUNNING); + do_something(my_data); + +and the waker does: + + my_data = value; + event_indicated = 1; + wake_up(&event_wait_queue); + +there's no guarantee that the change to event_indicated will be perceived by +the sleeper as coming after the change to my_data. In such a circumstance, the +code on both sides must interpolate its own memory barriers between the +separate data accesses. Thus the above sleeper ought to do: + + set_current_state(TASK_INTERRUPTIBLE); + if (event_indicated) { + smp_rmb(); + do_something(my_data); + } + +and the waker should do: + + my_data = value; + smp_wmb(); + event_indicated = 1; + wake_up(&event_wait_queue); + + MISCELLANEOUS FUNCTIONS ----------------------- @@ -1366,7 +1493,7 @@ WHERE ARE MEMORY BARRIERS NEEDED? Under normal operation, memory operation reordering is generally not going to be a problem as a single-threaded linear piece of code will still appear to -work correctly, even if it's in an SMP kernel. There are, however, three +work correctly, even if it's in an SMP kernel. There are, however, four circumstances in which reordering definitely _could_ be a problem: (*) Interprocessor interaction. diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ec5de02f543f..b121c5db707f 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1266,13 +1266,22 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max sctp_wmem - vector of 3 INTEGERs: min, default, max See tcp_wmem for a description. -UNDOCUMENTED: /proc/sys/net/core/* - dev_weight FIXME +dev_weight - INTEGER + The maximum number of packets that kernel can handle on a NAPI + interrupt, it's a Per-CPU variable. + + Default: 64 /proc/sys/net/unix/* - max_dgram_qlen FIXME +max_dgram_qlen - INTEGER + The maximum length of dgram socket receive queue + + Default: 10 + + +UNDOCUMENTED: /proc/sys/net/irda/* fast_poll_increase FIXME diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt index 5ba4d3fc625a..1df7f9cdab05 100644 --- a/Documentation/scheduler/sched-rt-group.txt +++ b/Documentation/scheduler/sched-rt-group.txt @@ -4,6 +4,7 @@ CONTENTS ======== +0. WARNING 1. Overview 1.1 The problem 1.2 The solution @@ -14,6 +15,23 @@ CONTENTS 3. Future plans +0. WARNING +========== + + Fiddling with these settings can result in an unstable system, the knobs are + root only and assumes root knows what he is doing. + +Most notable: + + * very small values in sched_rt_period_us can result in an unstable + system when the period is smaller than either the available hrtimer + resolution, or the time it takes to handle the budget refresh itself. + + * very small values in sched_rt_runtime_us can result in an unstable + system when the runtime is so small the system has difficulty making + forward progress (NOTE: the migration thread and kstopmachine both + are real-time processes). + 1. Overview =========== @@ -169,7 +187,7 @@ get their allocated time. Implementing SCHED_EDF might take a while to complete. Priority Inheritance is the biggest challenge as the current linux PI infrastructure is geared towards -the limited static priority levels 0-139. With deadline scheduling you need to +the limited static priority levels 0-99. With deadline scheduling you need to do deadline inheritance (since priority is inversely proportional to the deadline delta (deadline - now). diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt index 8eec05bc079e..322869fc8a9e 100644 --- a/Documentation/sound/alsa/HD-Audio-Models.txt +++ b/Documentation/sound/alsa/HD-Audio-Models.txt @@ -334,6 +334,7 @@ STAC9227/9228/9229/927x ref-no-jd Reference board without HP/Mic jack detection 3stack D965 3stack 5stack D965 5stack + SPDIF + 5stack-no-fp D965 5stack without front panel dell-3stack Dell Dimension E520 dell-bios Fixes with Dell BIOS setup auto BIOS setup (default) diff --git a/Documentation/sound/alsa/Procfile.txt b/Documentation/sound/alsa/Procfile.txt index bba2dbb79d81..cfac20cf9e33 100644 --- a/Documentation/sound/alsa/Procfile.txt +++ b/Documentation/sound/alsa/Procfile.txt @@ -104,6 +104,11 @@ card*/pcm*/xrun_debug When this value is greater than 1, the driver will show the stack trace additionally. This may help the debugging. + Since 2.6.30, this option also enables the hwptr check using + jiffies. This detects spontaneous invalid pointer callback + values, but can be lead to too much corrections for a (mostly + buggy) hardware that doesn't give smooth pointer updates. + card*/pcm*/sub*/info The general information of this PCM sub-stream. diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 5ad2ded8aa63..2a82d8602944 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -518,9 +518,18 @@ priority with zero (0) being the highest priority and the nice values starting at 100 (nice -20). Below is a quick chart to map the kernel priority to user land priorities. - Kernel priority: 0 to 99 ==> user RT priority 99 to 0 - Kernel priority: 100 to 139 ==> user nice -20 to 19 - Kernel priority: 140 ==> idle task priority + Kernel Space User Space + =============================================================== + 0(high) to 98(low) user RT priority 99(high) to 1(low) + with SCHED_RR or SCHED_FIFO + --------------------------------------------------------------- + 99 sched_priority is not used in scheduling + decisions(it must be specified as 0) + --------------------------------------------------------------- + 100(high) to 139(low) user nice -20(high) to 19(low) + --------------------------------------------------------------- + 140 idle task priority + --------------------------------------------------------------- The task states are: diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index e0203662f9e9..8da3a795083f 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt @@ -50,6 +50,10 @@ Protocol 2.08: (Kernel 2.6.26) Added crc32 checksum and ELF format Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical pointer to single linked list of struct setup_data. +Protocol 2.10: (Kernel 2.6.31) Added a protocol for relaxed alignment + beyond the kernel_alignment added, new init_size and + pref_address fields. Added extended boot loader IDs. + **** MEMORY LAYOUT The traditional memory map for the kernel loader, used for Image or @@ -168,12 +172,13 @@ Offset Proto Name Meaning 021C/4 2.00+ ramdisk_size initrd size (set by boot loader) 0220/4 2.00+ bootsect_kludge DO NOT USE - for bootsect.S use only 0224/2 2.01+ heap_end_ptr Free memory after setup end -0226/2 N/A pad1 Unused +0226/1 2.02+(3 ext_loader_ver Extended boot loader version +0227/1 2.02+(3 ext_loader_type Extended boot loader ID 0228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line 022C/4 2.03+ ramdisk_max Highest legal initrd address 0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel 0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not -0235/1 N/A pad2 Unused +0235/1 2.10+ min_alignment Minimum alignment, as a power of two 0236/2 N/A pad3 Unused 0238/4 2.06+ cmdline_size Maximum size of the kernel command line 023C/4 2.07+ hardware_subarch Hardware subarchitecture @@ -182,6 +187,8 @@ Offset Proto Name Meaning 024C/4 2.08+ payload_length Length of kernel payload 0250/8 2.09+ setup_data 64-bit physical pointer to linked list of struct setup_data +0258/8 2.10+ pref_address Preferred loading address +0260/4 2.10+ init_size Linear memory required during initialization (1) For backwards compatibility, if the setup_sects field contains 0, the real value is 4. @@ -190,6 +197,8 @@ Offset Proto Name Meaning field are unusable, which means the size of a bzImage kernel cannot be determined. +(3) Ignored, but safe to set, for boot protocols 2.02-2.09. + If the "HdrS" (0x53726448) magic number is not found at offset 0x202, the boot protocol version is "old". Loading an old kernel, the following parameters should be assumed: @@ -343,18 +352,32 @@ Protocol: 2.00+ 0xTV here, where T is an identifier for the boot loader and V is a version number. Otherwise, enter 0xFF here. + For boot loader IDs above T = 0xD, write T = 0xE to this field and + write the extended ID minus 0x10 to the ext_loader_type field. + Similarly, the ext_loader_ver field can be used to provide more than + four bits for the bootloader version. + + For example, for T = 0x15, V = 0x234, write: + + type_of_loader <- 0xE4 + ext_loader_type <- 0x05 + ext_loader_ver <- 0x23 + Assigned boot loader ids: 0 LILO (0x00 reserved for pre-2.00 bootloader) 1 Loadlin 2 bootsect-loader (0x20, all other values reserved) - 3 SYSLINUX - 4 EtherBoot + 3 Syslinux + 4 Etherboot/gPXE 5 ELILO 7 GRUB - 8 U-BOOT + 8 U-Boot 9 Xen A Gujin B Qemu + C Arcturus Networks uCbootloader + E Extended (see ext_loader_type) + F Special (0xFF = undefined) Please contact <hpa@zytor.com> if you need a bootloader ID value assigned. @@ -453,6 +476,35 @@ Protocol: 2.01+ Set this field to the offset (from the beginning of the real-mode code) of the end of the setup stack/heap, minus 0x0200. +Field name: ext_loader_ver +Type: write (optional) +Offset/size: 0x226/1 +Protocol: 2.02+ + + This field is used as an extension of the version number in the + type_of_loader field. The total version number is considered to be + (type_of_loader & 0x0f) + (ext_loader_ver << 4). + + The use of this field is boot loader specific. If not written, it + is zero. + + Kernels prior to 2.6.31 did not recognize this field, but it is safe + to write for protocol version 2.02 or higher. + +Field name: ext_loader_type +Type: write (obligatory if (type_of_loader & 0xf0) == 0xe0) +Offset/size: 0x227/1 +Protocol: 2.02+ + + This field is used as an extension of the type number in + type_of_loader field. If the type in type_of_loader is 0xE, then + the actual type is (ext_loader_type + 0x10). + + This field is ignored if the type in type_of_loader is not 0xE. + + Kernels prior to 2.6.31 did not recognize this field, but it is safe + to write for protocol version 2.02 or higher. + Field name: cmd_line_ptr Type: write (obligatory) Offset/size: 0x228/4 @@ -482,11 +534,19 @@ Protocol: 2.03+ 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.) Field name: kernel_alignment -Type: read (reloc) +Type: read/modify (reloc) Offset/size: 0x230/4 -Protocol: 2.05+ +Protocol: 2.05+ (read), 2.10+ (modify) + + Alignment unit required by the kernel (if relocatable_kernel is + true.) A relocatable kernel that is loaded at an alignment + incompatible with the value in this field will be realigned during + kernel initialization. - Alignment unit required by the kernel (if relocatable_kernel is true.) + Starting with protocol version 2.10, this reflects the kernel + alignment preferred for optimal performance; it is possible for the + loader to modify this field to permit a lesser alignment. See the + min_alignment and pref_address field below. Field name: relocatable_kernel Type: read (reloc) @@ -498,6 +558,22 @@ Protocol: 2.05+ After loading, the boot loader must set the code32_start field to point to the loaded code, or to a boot loader hook. +Field name: min_alignment +Type: read (reloc) +Offset/size: 0x235/1 +Protocol: 2.10+ + + This field, if nonzero, indicates as a power of two the minimum + alignment required, as opposed to preferred, by the kernel to boot. + If a boot loader makes use of this field, it should update the + kernel_alignment field with the alignment unit desired; typically: + + kernel_alignment = 1 << min_alignment + + There may be a considerable performance cost with an excessively + misaligned kernel. Therefore, a loader should typically try each + power-of-two alignment from kernel_alignment down to this alignment. + Field name: cmdline_size Type: read Offset/size: 0x238/4 @@ -582,6 +658,36 @@ Protocol: 2.09+ sure to consider the case where the linked list already contains entries. +Field name: pref_address +Type: read (reloc) +Offset/size: 0x258/8 +Protocol: 2.10+ + + This field, if nonzero, represents a preferred load address for the + kernel. A relocating bootloader should attempt to load at this + address if possible. + + A non-relocatable kernel will unconditionally move itself and to run + at this address. + +Field name: init_size +Type: read +Offset/size: 0x25c/4 + + This field indicates the amount of linear contiguous memory starting + at the kernel runtime start address that the kernel needs before it + is capable of examining its memory map. This is not the same thing + as the total amount of memory the kernel needs to boot, but it can + be used by a relocating boot loader to help select a safe load + address for the kernel. + + The kernel runtime start address is determined by the following algorithm: + + if (relocatable_kernel) + runtime_start = align_up(load_address, kernel_alignment) + else + runtime_start = pref_address + **** THE IMAGE CHECKSUM diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index 34c13040a718..2db5893d6c97 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt @@ -150,11 +150,6 @@ NUMA Otherwise, the remaining system RAM is allocated to an additional node. - numa=hotadd=percent - Only allow hotadd memory to preallocate page structures upto - percent of already available memory. - numa=hotadd=0 will disable hotadd memory. - ACPI acpi=off Don't enable ACPI diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 29b52b14d0b4..d6498e3cd713 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -6,10 +6,11 @@ Virtual memory map with 4 level page tables: 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm hole caused by [48:63] sign extension ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole -ffff880000000000 - ffffc0ffffffffff (=57 TB) direct mapping of all phys. memory -ffffc10000000000 - ffffc1ffffffffff (=40 bits) hole -ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space -ffffe20000000000 - ffffe2ffffffffff (=40 bits) virtual memory map (1TB) +ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory +ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole +ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space +ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole +ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) ... unused hole ... ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 ffffffffa0000000 - fffffffffff00000 (=1536 MB) module mapping space diff --git a/MAINTAINERS b/MAINTAINERS index 2b349ba4add4..cf4abddfc8a4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -434,7 +434,7 @@ F: arch/alpha/ AMD GEODE CS5536 USB DEVICE CONTROLLER DRIVER P: Thomas Dahlmann -M: thomas.dahlmann@amd.com +M: dahlmann.thomas@arcor.de L: linux-geode@lists.infradead.org (moderated for non-subscribers) S: Supported F: drivers/usb/gadget/amd5536udc.* @@ -624,6 +624,7 @@ M: paulius.zaleckas@teltonika.lt L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) T: git git://gitorious.org/linux-gemini/mainline.git S: Maintained +F: arch/arm/mach-gemini/ ARM/EBSA110 MACHINE SUPPORT P: Russell King @@ -650,6 +651,7 @@ P: Paulius Zaleckas M: paulius.zaleckas@teltonika.lt L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) S: Maintained +F: arch/arm/mm/*-fa* ARM/FOOTBRIDGE ARCHITECTURE P: Russell King @@ -1132,17 +1134,17 @@ F: fs/bfs/ F: include/linux/bfs_fs.h BLACKFIN ARCHITECTURE -P: Bryan Wu -M: cooloney@kernel.org +P: Mike Frysinger +M: vapier@gentoo.org L: uclinux-dist-devel@blackfin.uclinux.org W: http://blackfin.uclinux.org S: Supported F: arch/blackfin/ BLACKFIN EMAC DRIVER -P: Bryan Wu -M: cooloney@kernel.org -L: uclinux-dist-devel@blackfin.uclinux.org (subscribers-only) +P: Michael Hennerich +M: michael.hennerich@analog.com +L: uclinux-dist-devel@blackfin.uclinux.org W: http://blackfin.uclinux.org S: Supported F: drivers/net/bfin_mac.* @@ -1150,7 +1152,7 @@ F: drivers/net/bfin_mac.* BLACKFIN RTC DRIVER P: Mike Frysinger M: vapier.adi@gmail.com -L: uclinux-dist-devel@blackfin.uclinux.org (subscribers-only) +L: uclinux-dist-devel@blackfin.uclinux.org W: http://blackfin.uclinux.org S: Supported F: drivers/rtc/rtc-bfin.c @@ -1158,7 +1160,7 @@ F: drivers/rtc/rtc-bfin.c BLACKFIN SERIAL DRIVER P: Sonic Zhang M: sonic.zhang@analog.com -L: uclinux-dist-devel@blackfin.uclinux.org (subscribers-only) +L: uclinux-dist-devel@blackfin.uclinux.org W: http://blackfin.uclinux.org S: Supported F: drivers/serial/bfin_5xx.c @@ -1166,7 +1168,7 @@ F: drivers/serial/bfin_5xx.c BLACKFIN WATCHDOG DRIVER P: Mike Frysinger M: vapier.adi@gmail.com -L: uclinux-dist-devel@blackfin.uclinux.org (subscribers-only) +L: uclinux-dist-devel@blackfin.uclinux.org W: http://blackfin.uclinux.org S: Supported F: drivers/watchdog/bfin_wdt.c @@ -1174,7 +1176,7 @@ F: drivers/watchdog/bfin_wdt.c BLACKFIN I2C TWI DRIVER P: Sonic Zhang M: sonic.zhang@analog.com -L: uclinux-dist-devel@blackfin.uclinux.org (subscribers-only) +L: uclinux-dist-devel@blackfin.uclinux.org W: http://blackfin.uclinux.org/ S: Supported F: drivers/i2c/busses/i2c-bfin-twi.c @@ -1431,6 +1433,14 @@ P: Russell King M: linux@arm.linux.org.uk F: include/linux/clk.h +CISCO FCOE HBA DRIVER +P: Abhijeet Joglekar +M: abjoglek@cisco.com +P: Joe Eykholt +M: jeykholt@cisco.com +L: linux-scsi@vger.kernel.org +S: Supported + CODA FILE SYSTEM P: Jan Harkes M: jaharkes@cs.cmu.edu @@ -1532,6 +1542,13 @@ W: http://www.fi.muni.cz/~kas/cosa/ S: Maintained F: drivers/net/wan/cosa* +CPMAC ETHERNET DRIVER +P: Florian Fainelli +M: florian@openwrt.org +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/cpmac.c + CPU FREQUENCY DRIVERS P: Dave Jones M: davej@redhat.com @@ -1963,8 +1980,8 @@ F: include/linux/edac.h EDAC-E752X P: Mark Gross -P: Doug Thompson M: mark.gross@intel.com +P: Doug Thompson M: dougthompson@xmission.com L: bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers) W: bluesmoke.sourceforge.net @@ -2241,7 +2258,7 @@ P: Li Yang M: leoli@freescale.com P: Zhang Wei M: zw@zh-kernel.org -L: linuxppc-embedded@ozlabs.org +L: linuxppc-dev@ozlabs.org L: linux-kernel@vger.kernel.org S: Maintained F: drivers/dma/fsldma.* @@ -5579,6 +5596,14 @@ M: ian@mnementh.co.uk S: Maintained F: drivers/mmc/host/tmio_mmc.* +TMPFS (SHMEM FILESYSTEM) +P: Hugh Dickins +M: hugh.dickins@tiscali.co.uk +L: linux-mm@kvack.org +S: Maintained +F: include/linux/shmem_fs.h +F: mm/shmem.c + TPM DEVICE DRIVER P: Debora Velarde M: debora@linux.vnet.ibm.com @@ -1,8 +1,8 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 30 -EXTRAVERSION = -rc6 -NAME = Vindictive Armadillo +EXTRAVERSION = +NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* # To see a list of typical targets execute "make help" @@ -533,7 +533,7 @@ endif include $(srctree)/arch/$(SRCARCH)/Makefile -ifneq (CONFIG_FRAME_WARN,0) +ifneq ($(CONFIG_FRAME_WARN),0) KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN}) endif diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index 9c9d1fd4155f..5bd5259324b7 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -176,22 +176,26 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) } } -static void +static int dp264_set_affinity(unsigned int irq, const struct cpumask *affinity) { spin_lock(&dp264_irq_lock); cpu_set_irq_affinity(irq, *affinity); tsunami_update_irq_hw(cached_irq_mask); spin_unlock(&dp264_irq_lock); + + return 0; } -static void +static int clipper_set_affinity(unsigned int irq, const struct cpumask *affinity) { spin_lock(&dp264_irq_lock); cpu_set_irq_affinity(irq - 16, *affinity); tsunami_update_irq_hw(cached_irq_mask); spin_unlock(&dp264_irq_lock); + + return 0; } static struct hw_interrupt_type dp264_irq_type = { diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c index 27f840a4ad3d..8dd239ebdb9e 100644 --- a/arch/alpha/kernel/sys_titan.c +++ b/arch/alpha/kernel/sys_titan.c @@ -157,13 +157,15 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) } -static void +static int titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) { spin_lock(&titan_irq_lock); titan_cpu_set_irq_affinity(irq - 16, *affinity); titan_update_irq_hw(titan_cached_irq_mask); spin_unlock(&titan_irq_lock); + + return 0; } static void diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e60ec54df334..9d02cdb15b23 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -273,6 +273,7 @@ config ARCH_EP93XX select HAVE_CLK select COMMON_CLKDEV select ARCH_REQUIRE_GPIOLIB + select ARCH_HAS_HOLES_MEMORYMODEL help This enables support for the Cirrus EP93xx series of CPUs. @@ -976,10 +977,9 @@ config OABI_COMPAT UNPREDICTABLE (in fact it can be predicted that it won't work at all). If in doubt say Y. -config ARCH_FLATMEM_HAS_HOLES +config ARCH_HAS_HOLES_MEMORYMODEL bool - default y - depends on FLATMEM + default n # Discontigmem is deprecated config ARCH_DISCONTIGMEM_ENABLE diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c index c6884ba1d5ed..664c7b8b1ba8 100644 --- a/arch/arm/common/gic.c +++ b/arch/arm/common/gic.c @@ -109,7 +109,7 @@ static void gic_unmask_irq(unsigned int irq) } #ifdef CONFIG_SMP -static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val) +static int gic_set_cpu(unsigned int irq, const struct cpumask *mask_val) { void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3); unsigned int shift = (irq % 4) * 8; @@ -122,6 +122,8 @@ static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val) val |= 1 << (cpu + shift); writel(val, reg); spin_unlock(&irq_controller_lock); + + return 0; } #endif @@ -253,9 +255,9 @@ void __cpuinit gic_cpu_init(unsigned int gic_nr, void __iomem *base) } #ifdef CONFIG_SMP -void gic_raise_softirq(cpumask_t cpumask, unsigned int irq) +void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) { - unsigned long map = *cpus_addr(cpumask); + unsigned long map = *cpus_addr(*mask); /* this always happens on GIC0 */ writel(map << 16 | irq, gic_data[0].dist_base + GIC_DIST_SOFTINT); diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 6116e4893c0a..15f8a092b700 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -114,3 +114,16 @@ .align 3; \ .long 9999b,9001f; \ .previous + +/* + * SMP data memory barrier + */ + .macro smp_dmb +#ifdef CONFIG_SMP +#if __LINUX_ARM_ARCH__ >= 7 + dmb +#elif __LINUX_ARM_ARCH__ == 6 + mcr p15, 0, r0, c7, c10, 5 @ dmb +#endif +#endif + .endm diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index ee99723b3a6c..16b52f397983 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -44,11 +44,29 @@ static inline void atomic_set(atomic_t *v, int i) : "cc"); } +static inline void atomic_add(int i, atomic_t *v) +{ + unsigned long tmp; + int result; + + __asm__ __volatile__("@ atomic_add\n" +"1: ldrex %0, [%2]\n" +" add %0, %0, %3\n" +" strex %1, %0, [%2]\n" +" teq %1, #0\n" +" bne 1b" + : "=&r" (result), "=&r" (tmp) + : "r" (&v->counter), "Ir" (i) + : "cc"); +} + static inline int atomic_add_return(int i, atomic_t *v) { unsigned long tmp; int result; + smp_mb(); + __asm__ __volatile__("@ atomic_add_return\n" "1: ldrex %0, [%2]\n" " add %0, %0, %3\n" @@ -59,14 +77,34 @@ static inline int atomic_add_return(int i, atomic_t *v) : "r" (&v->counter), "Ir" (i) : "cc"); + smp_mb(); + return result; } +static inline void atomic_sub(int i, atomic_t *v) +{ + unsigned long tmp; + int result; + + __asm__ __volatile__("@ atomic_sub\n" +"1: ldrex %0, [%2]\n" +" sub %0, %0, %3\n" +" strex %1, %0, [%2]\n" +" teq %1, #0\n" +" bne 1b" + : "=&r" (result), "=&r" (tmp) + : "r" (&v->counter), "Ir" (i) + : "cc"); +} + static inline int atomic_sub_return(int i, atomic_t *v) { unsigned long tmp; int result; + smp_mb(); + __asm__ __volatile__("@ atomic_sub_return\n" "1: ldrex %0, [%2]\n" " sub %0, %0, %3\n" @@ -77,6 +115,8 @@ static inline int atomic_sub_return(int i, atomic_t *v) : "r" (&v->counter), "Ir" (i) : "cc"); + smp_mb(); + return result; } @@ -84,6 +124,8 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) { unsigned long oldval, res; + smp_mb(); + do { __asm__ __volatile__("@ atomic_cmpxchg\n" "ldrex %1, [%2]\n" @@ -95,6 +137,8 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) : "cc"); } while (res); + smp_mb(); + return oldval; } @@ -135,6 +179,7 @@ static inline int atomic_add_return(int i, atomic_t *v) return val; } +#define atomic_add(i, v) (void) atomic_add_return(i, v) static inline int atomic_sub_return(int i, atomic_t *v) { @@ -148,6 +193,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) return val; } +#define atomic_sub(i, v) (void) atomic_sub_return(i, v) static inline int atomic_cmpxchg(atomic_t *v, int old, int new) { @@ -187,10 +233,8 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) } #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) -#define atomic_add(i, v) (void) atomic_add_return(i, v) -#define atomic_inc(v) (void) atomic_add_return(1, v) -#define atomic_sub(i, v) (void) atomic_sub_return(i, v) -#define atomic_dec(v) (void) atomic_sub_return(1, v) +#define atomic_inc(v) atomic_add(1, v) +#define atomic_dec(v) atomic_sub(1, v) #define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0) #define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0) @@ -200,11 +244,10 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0) -/* Atomic operations are already serializing on ARM */ -#define smp_mb__before_atomic_dec() barrier() -#define smp_mb__after_atomic_dec() barrier() -#define smp_mb__before_atomic_inc() barrier() -#define smp_mb__after_atomic_inc() barrier() +#define smp_mb__before_atomic_dec() smp_mb() +#define smp_mb__after_atomic_dec() smp_mb() +#define smp_mb__before_atomic_inc() smp_mb() +#define smp_mb__after_atomic_inc() smp_mb() #include <asm-generic/atomic.h> #endif diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h index cb7a9e97fd7e..feaa75f0013e 100644 --- a/arch/arm/include/asm/cache.h +++ b/arch/arm/include/asm/cache.h @@ -7,4 +7,20 @@ #define L1_CACHE_SHIFT 5 #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +/* + * Memory returned by kmalloc() may be used for DMA, so we must make + * sure that all such allocations are cache aligned. Otherwise, + * unrelated code may cause parts of the buffer to be read into the + * cache before the transfer is done, causing old data to be seen by + * the CPU. + */ +#define ARCH_KMALLOC_MINALIGN L1_CACHE_BYTES + +/* + * With EABI on ARMv5 and above we must have 64-bit aligned slab pointers. + */ +#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) +#define ARCH_SLAB_MINALIGN 8 +#endif + #endif diff --git a/arch/arm/include/asm/flat.h b/arch/arm/include/asm/flat.h index 1d77e51907f6..59426a4595c9 100644 --- a/arch/arm/include/asm/flat.h +++ b/arch/arm/include/asm/flat.h @@ -5,9 +5,6 @@ #ifndef __ARM_FLAT_H__ #define __ARM_FLAT_H__ -/* An odd number of words will be pushed after this alignment, so - deliberately misalign the value. */ -#define flat_stack_align(sp) sp = (void *)(((unsigned long)(sp) - 4) | 4) #define flat_argvp_envp_on_stack() 1 #define flat_old_ram_flag(flags) (flags) #define flat_reloc_valid(reloc, size) ((reloc) <= (size)) diff --git a/arch/arm/include/asm/hardware/gic.h b/arch/arm/include/asm/hardware/gic.h index 4924914af188..7f34333bb545 100644 --- a/arch/arm/include/asm/hardware/gic.h +++ b/arch/arm/include/asm/hardware/gic.h @@ -36,7 +36,7 @@ void gic_dist_init(unsigned int gic_nr, void __iomem *base, unsigned int irq_start); void gic_cpu_init(unsigned int gic_nr, void __iomem *base); void gic_cascade_irq(unsigned int gic_nr, unsigned int irq); -void gic_raise_softirq(cpumask_t cpumask, unsigned int irq); +void gic_raise_softirq(const struct cpumask *mask, unsigned int irq); #endif #endif diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index e6eb8a67b807..7b522770f29d 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -202,13 +202,6 @@ typedef struct page *pgtable_t; (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -/* - * With EABI on ARMv5 and above we must have 64-bit aligned slab pointers. - */ -#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) -#define ARCH_SLAB_MINALIGN 8 -#endif - #include <asm-generic/page.h> #endif diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index fad70da5911d..5995935338e1 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -53,17 +53,12 @@ extern void smp_store_cpu_info(unsigned int cpuid); /* * Raise an IPI cross call on CPUs in callmap. */ -extern void smp_cross_call(cpumask_t callmap); - -/* - * Broadcast a timer interrupt to the other CPUs. - */ -extern void smp_send_timer(void); +extern void smp_cross_call(const struct cpumask *mask); /* * Broadcast a clock event to other CPUs. */ -extern void smp_timer_broadcast(cpumask_t mask); +extern void smp_timer_broadcast(const struct cpumask *mask); /* * Boot a secondary CPU, and assign it the specified idle task. @@ -102,7 +97,8 @@ extern int platform_cpu_kill(unsigned int cpu); extern void platform_cpu_enable(unsigned int cpu); extern void arch_send_call_function_single_ipi(int cpu); -extern void arch_send_call_function_ipi(cpumask_t mask); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); +#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask /* * Local timer interrupt handling function (can be IPI'ed). diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index bd4dc8ed53d5..d65b2f5bf41f 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -248,6 +248,8 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size unsigned int tmp; #endif + smp_mb(); + switch (size) { #if __LINUX_ARM_ARCH__ >= 6 case 1: @@ -307,6 +309,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size __bad_xchg(ptr, size), ret = 0; break; } + smp_mb(); return ret; } @@ -316,6 +319,12 @@ extern void enable_hlt(void); #include <asm-generic/cmpxchg-local.h> +#if __LINUX_ARM_ARCH__ < 6 + +#ifdef CONFIG_SMP +#error "SMP is not supported on this platform" +#endif + /* * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make * them available. @@ -329,6 +338,173 @@ extern void enable_hlt(void); #include <asm-generic/cmpxchg.h> #endif +#else /* __LINUX_ARM_ARCH__ >= 6 */ + +extern void __bad_cmpxchg(volatile void *ptr, int size); + +/* + * cmpxchg only support 32-bits operands on ARMv6. + */ + +static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long oldval, res; + + switch (size) { +#ifdef CONFIG_CPU_32v6K + case 1: + do { + asm volatile("@ __cmpxchg1\n" + " ldrexb %1, [%2]\n" + " mov %0, #0\n" + " teq %1, %3\n" + " strexbeq %0, %4, [%2]\n" + : "=&r" (res), "=&r" (oldval) + : "r" (ptr), "Ir" (old), "r" (new) + : "memory", "cc"); + } while (res); + break; + case 2: + do { + asm volatile("@ __cmpxchg1\n" + " ldrexh %1, [%2]\n" + " mov %0, #0\n" + " teq %1, %3\n" + " strexheq %0, %4, [%2]\n" + : "=&r" (res), "=&r" (oldval) + : "r" (ptr), "Ir" (old), "r" (new) + : "memory", "cc"); + } while (res); + break; +#endif /* CONFIG_CPU_32v6K */ + case 4: + do { + asm volatile("@ __cmpxchg4\n" + " ldrex %1, [%2]\n" + " mov %0, #0\n" + " teq %1, %3\n" + " strexeq %0, %4, [%2]\n" + : "=&r" (res), "=&r" (oldval) + : "r" (ptr), "Ir" (old), "r" (new) + : "memory", "cc"); + } while (res); + break; + default: + __bad_cmpxchg(ptr, size); + oldval = 0; + } + + return oldval; +} + +static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long ret; + + smp_mb(); + ret = __cmpxchg(ptr, old, new, size); + smp_mb(); + + return ret; +} + +#define cmpxchg(ptr,o,n) \ + ((__typeof__(*(ptr)))__cmpxchg_mb((ptr), \ + (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr)))) + +static inline unsigned long __cmpxchg_local(volatile void *ptr, + unsigned long old, + unsigned long new, int size) +{ + unsigned long ret; + + switch (size) { +#ifndef CONFIG_CPU_32v6K + case 1: + case 2: + ret = __cmpxchg_local_generic(ptr, old, new, size); + break; +#endif /* !CONFIG_CPU_32v6K */ + default: + ret = __cmpxchg(ptr, old, new, size); + } + + return ret; +} + +#define cmpxchg_local(ptr,o,n) \ + ((__typeof__(*(ptr)))__cmpxchg_local((ptr), \ + (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr)))) + +#ifdef CONFIG_CPU_32v6K + +/* + * Note : ARMv7-M (currently unsupported by Linux) does not support + * ldrexd/strexd. If ARMv7-M is ever supported by the Linux kernel, it should + * not be allowed to use __cmpxchg64. + */ +static inline unsigned long long __cmpxchg64(volatile void *ptr, + unsigned long long old, + unsigned long long new) +{ + register unsigned long long oldval asm("r0"); + register unsigned long long __old asm("r2") = old; + register unsigned long long __new asm("r4") = new; + unsigned long res; + + do { + asm volatile( + " @ __cmpxchg8\n" + " ldrexd %1, %H1, [%2]\n" + " mov %0, #0\n" + " teq %1, %3\n" + " teqeq %H1, %H3\n" + " strexdeq %0, %4, %H4, [%2]\n" + : "=&r" (res), "=&r" (oldval) + : "r" (ptr), "Ir" (__old), "r" (__new) + : "memory", "cc"); + } while (res); + + return oldval; +} + +static inline unsigned long long __cmpxchg64_mb(volatile void *ptr, + unsigned long long old, + unsigned long long new) +{ + unsigned long long ret; + + smp_mb(); + ret = __cmpxchg64(ptr, old, new); + smp_mb(); + + return ret; +} + +#define cmpxchg64(ptr,o,n) \ + ((__typeof__(*(ptr)))__cmpxchg64_mb((ptr), \ + (unsigned long long)(o), \ + (unsigned long long)(n))) + +#define cmpxchg64_local(ptr,o,n) \ + ((__typeof__(*(ptr)))__cmpxchg64((ptr), \ + (unsigned long long)(o), \ + (unsigned long long)(n))) + +#else /* !CONFIG_CPU_32v6K */ + +#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) + +#endif /* CONFIG_CPU_32v6K */ + +#endif /* __LINUX_ARM_ARCH__ >= 6 */ + #endif /* __ASSEMBLY__ */ #define arch_align_stack(x) (x) diff --git a/arch/arm/kernel/elf.c b/arch/arm/kernel/elf.c index d4a0da1e48f4..950391f194c4 100644 --- a/arch/arm/kernel/elf.c +++ b/arch/arm/kernel/elf.c @@ -78,6 +78,15 @@ int arm_elf_read_implies_exec(const struct elf32_hdr *x, int executable_stack) return 1; if (cpu_architecture() < CPU_ARCH_ARMv6) return 1; +#if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT) + /* + * If we have support for OABI programs, we can never allow NX + * support - our signal syscall restart mechanism relies upon + * being able to execute code placed on the user stack. + */ + return 1; +#else return 0; +#endif } EXPORT_SYMBOL(arm_elf_read_implies_exec); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index d662a2f1fd85..83b1da6b7baa 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -815,10 +815,7 @@ __kuser_helper_start: */ __kuser_memory_barrier: @ 0xffff0fa0 - -#if __LINUX_ARM_ARCH__ >= 6 && defined(CONFIG_SMP) - mcr p15, 0, r0, c7, c10, 5 @ dmb -#endif + smp_dmb usr_ret lr .align 5 diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 7801aac3c043..6014dfd22af4 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -326,14 +326,14 @@ void __init smp_prepare_boot_cpu(void) per_cpu(cpu_data, cpu).idle = current; } -static void send_ipi_message(cpumask_t callmap, enum ipi_msg_type msg) +static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg) { unsigned long flags; unsigned int cpu; local_irq_save(flags); - for_each_cpu_mask(cpu, callmap) { + for_each_cpu(cpu, mask) { struct ipi_data *ipi = &per_cpu(ipi_data, cpu); spin_lock(&ipi->lock); @@ -344,19 +344,19 @@ static void send_ipi_message(cpumask_t callmap, enum ipi_msg_type msg) /* * Call the platform specific cross-CPU call function. */ - smp_cross_call(callmap); + smp_cross_call(mask); local_irq_restore(flags); } -void arch_send_call_function_ipi(cpumask_t mask) +void arch_send_call_function_ipi_mask(const struct cpumask *mask) { send_ipi_message(mask, IPI_CALL_FUNC); } void arch_send_call_function_single_ipi(int cpu) { - send_ipi_message(cpumask_of_cpu(cpu), IPI_CALL_FUNC_SINGLE); + send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); } void show_ipi_list(struct seq_file *p) @@ -498,17 +498,10 @@ asmlinkage void __exception do_IPI(struct pt_regs *regs) void smp_send_reschedule(int cpu) { - send_ipi_message(cpumask_of_cpu(cpu), IPI_RESCHEDULE); + send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE); } -void smp_send_timer(void) -{ - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - send_ipi_message(mask, IPI_TIMER); -} - -void smp_timer_broadcast(cpumask_t mask) +void smp_timer_broadcast(const struct cpumask *mask) { send_ipi_message(mask, IPI_TIMER); } @@ -517,7 +510,7 @@ void smp_send_stop(void) { cpumask_t mask = cpu_online_map; cpu_clear(smp_processor_id(), mask); - send_ipi_message(mask, IPI_CPU_STOP); + send_ipi_message(&mask, IPI_CPU_STOP); } /* @@ -528,20 +521,17 @@ int setup_profiling_timer(unsigned int multiplier) return -EINVAL; } -static int -on_each_cpu_mask(void (*func)(void *), void *info, int wait, cpumask_t mask) +static void +on_each_cpu_mask(void (*func)(void *), void *info, int wait, + const struct cpumask *mask) { - int ret = 0; - preempt_disable(); - ret = smp_call_function_mask(mask, func, info, wait); - if (cpu_isset(smp_processor_id(), mask)) + smp_call_function_many(mask, func, info, wait); + if (cpumask_test_cpu(smp_processor_id(), mask)) func(info); preempt_enable(); - - return ret; } /**********************************************************************/ @@ -602,20 +592,17 @@ void flush_tlb_all(void) void flush_tlb_mm(struct mm_struct *mm) { - cpumask_t mask = mm->cpu_vm_mask; - - on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, mask); + on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, &mm->cpu_vm_mask); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) { - cpumask_t mask = vma->vm_mm->cpu_vm_mask; struct tlb_args ta; ta.ta_vma = vma; ta.ta_start = uaddr; - on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, mask); + on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, &vma->vm_mm->cpu_vm_mask); } void flush_tlb_kernel_page(unsigned long kaddr) @@ -630,14 +617,13 @@ void flush_tlb_kernel_page(unsigned long kaddr) void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - cpumask_t mask = vma->vm_mm->cpu_vm_mask; struct tlb_args ta; ta.ta_vma = vma; ta.ta_start = start; ta.ta_end = end; - on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, mask); + on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, &vma->vm_mm->cpu_vm_mask); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index 2e787d40d599..c7f2627385e7 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -18,12 +18,14 @@ mov r2, #1 add r1, r1, r0, lsr #3 @ Get byte offset mov r3, r2, lsl r3 @ create mask + smp_dmb 1: ldrexb r2, [r1] ands r0, r2, r3 @ save old value of bit \instr r2, r2, r3 @ toggle bit strexb ip, r2, [r1] cmp ip, #0 bne 1b + smp_dmb cmp r0, #0 movne r0, #1 2: mov pc, lr diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c index e8ebeaea6c48..b2eede5531c8 100644 --- a/arch/arm/mach-ep93xx/clock.c +++ b/arch/arm/mach-ep93xx/clock.c @@ -21,15 +21,50 @@ #include <asm/div64.h> #include <mach/hardware.h> + +/* + * The EP93xx has two external crystal oscillators. To generate the + * required high-frequency clocks, the processor uses two phase-locked- + * loops (PLLs) to multiply the incoming external clock signal to much + * higher frequencies that are then divided down by programmable dividers + * to produce the needed clocks. The PLLs operate independently of one + * another. + */ +#define EP93XX_EXT_CLK_RATE 14745600 +#define EP93XX_EXT_RTC_RATE 32768 + + struct clk { unsigned long rate; int users; + int sw_locked; u32 enable_reg; u32 enable_mask; + + unsigned long (*get_rate)(struct clk *clk); }; -static struct clk clk_uart = { - .rate = 14745600, + +static unsigned long get_uart_rate(struct clk *clk); + + +static struct clk clk_uart1 = { + .sw_locked = 1, + .enable_reg = EP93XX_SYSCON_DEVICE_CONFIG, + .enable_mask = EP93XX_SYSCON_DEVICE_CONFIG_U1EN, + .get_rate = get_uart_rate, +}; +static struct clk clk_uart2 = { + .sw_locked = 1, + .enable_reg = EP93XX_SYSCON_DEVICE_CONFIG, + .enable_mask = EP93XX_SYSCON_DEVICE_CONFIG_U2EN, + .get_rate = get_uart_rate, +}; +static struct clk clk_uart3 = { + .sw_locked = 1, + .enable_reg = EP93XX_SYSCON_DEVICE_CONFIG, + .enable_mask = EP93XX_SYSCON_DEVICE_CONFIG_U3EN, + .get_rate = get_uart_rate, }; static struct clk clk_pll1; static struct clk clk_f; @@ -95,9 +130,9 @@ static struct clk clk_m2m1 = { { .dev_id = dev, .con_id = con, .clk = ck } static struct clk_lookup clocks[] = { - INIT_CK("apb:uart1", NULL, &clk_uart), - INIT_CK("apb:uart2", NULL, &clk_uart), - INIT_CK("apb:uart3", NULL, &clk_uart), + INIT_CK("apb:uart1", NULL, &clk_uart1), + INIT_CK("apb:uart2", NULL, &clk_uart2), + INIT_CK("apb:uart3", NULL, &clk_uart3), INIT_CK(NULL, "pll1", &clk_pll1), INIT_CK(NULL, "fclk", &clk_f), INIT_CK(NULL, "hclk", &clk_h), @@ -125,6 +160,8 @@ int clk_enable(struct clk *clk) u32 value; value = __raw_readl(clk->enable_reg); + if (clk->sw_locked) + __raw_writel(0xaa, EP93XX_SYSCON_SWLOCK); __raw_writel(value | clk->enable_mask, clk->enable_reg); } @@ -138,13 +175,29 @@ void clk_disable(struct clk *clk) u32 value; value = __raw_readl(clk->enable_reg); + if (clk->sw_locked) + __raw_writel(0xaa, EP93XX_SYSCON_SWLOCK); __raw_writel(value & ~clk->enable_mask, clk->enable_reg); } } EXPORT_SYMBOL(clk_disable); +static unsigned long get_uart_rate(struct clk *clk) +{ + u32 value; + + value = __raw_readl(EP93XX_SYSCON_CLOCK_CONTROL); + if (value & EP93XX_SYSCON_CLOCK_UARTBAUD) + return EP93XX_EXT_CLK_RATE; + else + return EP93XX_EXT_CLK_RATE / 2; +} + unsigned long clk_get_rate(struct clk *clk) { + if (clk->get_rate) + return clk->get_rate(clk); + return clk->rate; } EXPORT_SYMBOL(clk_get_rate); @@ -162,7 +215,7 @@ static unsigned long calc_pll_rate(u32 config_word) unsigned long long rate; int i; - rate = 14745600; + rate = EP93XX_EXT_CLK_RATE; rate *= ((config_word >> 11) & 0x1f) + 1; /* X1FBD */ rate *= ((config_word >> 5) & 0x3f) + 1; /* X2FBD */ do_div(rate, (config_word & 0x1f) + 1); /* X2IPD */ @@ -195,7 +248,7 @@ static int __init ep93xx_clock_init(void) value = __raw_readl(EP93XX_SYSCON_CLOCK_SET1); if (!(value & 0x00800000)) { /* PLL1 bypassed? */ - clk_pll1.rate = 14745600; + clk_pll1.rate = EP93XX_EXT_CLK_RATE; } else { clk_pll1.rate = calc_pll_rate(value); } @@ -206,7 +259,7 @@ static int __init ep93xx_clock_init(void) value = __raw_readl(EP93XX_SYSCON_CLOCK_SET2); if (!(value & 0x00080000)) { /* PLL2 bypassed? */ - clk_pll2.rate = 14745600; + clk_pll2.rate = EP93XX_EXT_CLK_RATE; } else if (value & 0x00040000) { /* PLL2 enabled? */ clk_pll2.rate = calc_pll_rate(value); } else { diff --git a/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h b/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h index f66be12b856e..1732de7629a5 100644 --- a/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h +++ b/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h @@ -159,7 +159,10 @@ #define EP93XX_SYSCON_CLOCK_SET1 EP93XX_SYSCON_REG(0x20) #define EP93XX_SYSCON_CLOCK_SET2 EP93XX_SYSCON_REG(0x24) #define EP93XX_SYSCON_DEVICE_CONFIG EP93XX_SYSCON_REG(0x80) -#define EP93XX_SYSCON_DEVICE_CONFIG_CRUNCH_ENABLE 0x00800000 +#define EP93XX_SYSCON_DEVICE_CONFIG_U3EN (1<<24) +#define EP93XX_SYSCON_DEVICE_CONFIG_CRUNCH_ENABLE (1<<23) +#define EP93XX_SYSCON_DEVICE_CONFIG_U2EN (1<<20) +#define EP93XX_SYSCON_DEVICE_CONFIG_U1EN (1<<18) #define EP93XX_SYSCON_SWLOCK EP93XX_SYSCON_REG(0xc0) #define EP93XX_WATCHDOG_BASE (EP93XX_APB_VIRT_BASE + 0x00140000) diff --git a/arch/arm/mach-gemini/include/mach/hardware.h b/arch/arm/mach-gemini/include/mach/hardware.h index de6752674c05..213a4fcfeb1c 100644 --- a/arch/arm/mach-gemini/include/mach/hardware.h +++ b/arch/arm/mach-gemini/include/mach/hardware.h @@ -15,10 +15,9 @@ /* * Memory Map definitions */ -/* FIXME: Does it really swap SRAM like this? */ #ifdef CONFIG_GEMINI_MEM_SWAP # define GEMINI_DRAM_BASE 0x00000000 -# define GEMINI_SRAM_BASE 0x20000000 +# define GEMINI_SRAM_BASE 0x70000000 #else # define GEMINI_SRAM_BASE 0x00000000 # define GEMINI_DRAM_BASE 0x10000000 diff --git a/arch/arm/mach-integrator/core.c b/arch/arm/mach-integrator/core.c index 6f8872913073..a0f60e55da6a 100644 --- a/arch/arm/mach-integrator/core.c +++ b/arch/arm/mach-integrator/core.c @@ -121,7 +121,7 @@ static struct clk uartclk = { .rate = 14745600, }; -static struct clk_lookup lookups[] __initdata = { +static struct clk_lookup lookups[] = { { /* UART0 */ .dev_id = "mb:16", .clk = &uartclk, diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index eeb00240d784..be1ca28fed3f 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -144,6 +144,9 @@ static struct platform_device kirkwood_ge00 = { .id = 0, .num_resources = 1, .resource = kirkwood_ge00_resources, + .dev = { + .coherent_dma_mask = 0xffffffff, + }, }; void __init kirkwood_ge00_init(struct mv643xx_eth_platform_data *eth_data) @@ -202,6 +205,9 @@ static struct platform_device kirkwood_ge01 = { .id = 1, .num_resources = 1, .resource = kirkwood_ge01_resources, + .dev = { + .coherent_dma_mask = 0xffffffff, + }, }; void __init kirkwood_ge01_init(struct mv643xx_eth_platform_data *eth_data) @@ -386,12 +392,10 @@ static struct mv64xxx_i2c_pdata kirkwood_i2c_pdata = { static struct resource kirkwood_i2c_resources[] = { { - .name = "i2c", .start = I2C_PHYS_BASE, .end = I2C_PHYS_BASE + 0x1f, .flags = IORESOURCE_MEM, }, { - .name = "i2c", .start = IRQ_KIRKWOOD_TWSI, .end = IRQ_KIRKWOOD_TWSI, .flags = IORESOURCE_IRQ, diff --git a/arch/arm/mach-kirkwood/ts219-setup.c b/arch/arm/mach-kirkwood/ts219-setup.c index dda5743cf3e0..01aa213c0a6f 100644 --- a/arch/arm/mach-kirkwood/ts219-setup.c +++ b/arch/arm/mach-kirkwood/ts219-setup.c @@ -142,6 +142,8 @@ static unsigned int qnap_ts219_mpp_config[] __initdata = { MPP1_SPI_MOSI, MPP2_SPI_SCK, MPP3_SPI_MISO, + MPP4_SATA1_ACTn, + MPP5_SATA0_ACTn, MPP8_TW_SDA, MPP9_TW_SCK, MPP10_UART0_TXD, @@ -150,10 +152,6 @@ static unsigned int qnap_ts219_mpp_config[] __initdata = { MPP14_UART1_RXD, /* PIC controller */ MPP15_GPIO, /* USB Copy button */ MPP16_GPIO, /* Reset button */ - MPP20_SATA1_ACTn, - MPP21_SATA0_ACTn, - MPP22_SATA1_PRESENTn, - MPP23_SATA0_PRESENTn, 0 }; diff --git a/arch/arm/mach-l7200/include/mach/sys-clock.h b/arch/arm/mach-l7200/include/mach/sys-clock.h index 2d7722be60ea..e9729a35751d 100644 --- a/arch/arm/mach-l7200/include/mach/sys-clock.h +++ b/arch/arm/mach-l7200/include/mach/sys-clock.h @@ -18,7 +18,7 @@ /* IO_START and IO_BASE are defined in hardware.h */ -#define SYS_CLOCK_START (IO_START + SYS_CLCOK_OFF) /* Physical address */ +#define SYS_CLOCK_START (IO_START + SYS_CLOCK_OFF) /* Physical address */ #define SYS_CLOCK_BASE (IO_BASE + SYS_CLOCK_OFF) /* Virtual address */ /* Define the interface to the SYS_CLOCK */ diff --git a/arch/arm/mach-loki/common.c b/arch/arm/mach-loki/common.c index c0d2d9d12e74..818f19d7ab1f 100644 --- a/arch/arm/mach-loki/common.c +++ b/arch/arm/mach-loki/common.c @@ -82,6 +82,9 @@ static struct platform_device loki_ge0 = { .id = 0, .num_resources = 1, .resource = loki_ge0_resources, + .dev = { + .coherent_dma_mask = 0xffffffff, + }, }; void __init loki_ge0_init(struct mv643xx_eth_platform_data *eth_data) @@ -136,6 +139,9 @@ static struct platform_device loki_ge1 = { .id = 1, .num_resources = 1, .resource = loki_ge1_resources, + .dev = { + .coherent_dma_mask = 0xffffffff, + }, }; void __init loki_ge1_init(struct mv643xx_eth_platform_data *eth_data) diff --git a/arch/arm/mach-mmp/include/mach/mfp-pxa168.h b/arch/arm/mach-mmp/include/mach/mfp-pxa168.h index d0bdb6e3682b..2e914649b9e4 100644 --- a/arch/arm/mach-mmp/include/mach/mfp-pxa168.h +++ b/arch/arm/mach-mmp/include/mach/mfp-pxa168.h @@ -3,6 +3,11 @@ #include <mach/mfp.h> +#define MFP_DRIVE_VERY_SLOW (0x0 << 13) +#define MFP_DRIVE_SLOW (0x1 << 13) +#define MFP_DRIVE_MEDIUM (0x2 << 13) +#define MFP_DRIVE_FAST (0x3 << 13) + /* GPIO */ #define GPIO0_GPIO MFP_CFG(GPIO0, AF5) #define GPIO1_GPIO MFP_CFG(GPIO1, AF5) diff --git a/arch/arm/mach-mmp/include/mach/mfp-pxa910.h b/arch/arm/mach-mmp/include/mach/mfp-pxa910.h index 48a1cbc7c56b..d97de36c50ad 100644 --- a/arch/arm/mach-mmp/include/mach/mfp-pxa910.h +++ b/arch/arm/mach-mmp/include/mach/mfp-pxa910.h @@ -3,6 +3,11 @@ #include <mach/mfp.h> +#define MFP_DRIVE_VERY_SLOW (0x0 << 13) +#define MFP_DRIVE_SLOW (0x2 << 13) +#define MFP_DRIVE_MEDIUM (0x4 << 13) +#define MFP_DRIVE_FAST (0x8 << 13) + /* UART2 */ #define GPIO47_UART2_RXD MFP_CFG(GPIO47, AF6) #define GPIO48_UART2_TXD MFP_CFG(GPIO48, AF6) diff --git a/arch/arm/mach-mmp/include/mach/mfp.h b/arch/arm/mach-mmp/include/mach/mfp.h index 277ea4cd0f9f..62e510e80a58 100644 --- a/arch/arm/mach-mmp/include/mach/mfp.h +++ b/arch/arm/mach-mmp/include/mach/mfp.h @@ -12,16 +12,13 @@ * possible, we make the following compromise: * * 1. SLEEP_OE_N will always be programmed to '1' (by MFP_LPM_FLOAT) - * 2. DRIVE strength definitions redefined to include the reserved bit10 + * 2. DRIVE strength definitions redefined to include the reserved bit + * - the reserved bit differs between pxa168 and pxa910, and the + * MFP_DRIVE_* macros are individually defined in mfp-pxa{168,910}.h * 3. Override MFP_CFG() and MFP_CFG_DRV() * 4. Drop the use of MFP_CFG_LPM() and MFP_CFG_X() */ -#define MFP_DRIVE_VERY_SLOW (0x0 << 13) -#define MFP_DRIVE_SLOW (0x2 << 13) -#define MFP_DRIVE_MEDIUM (0x4 << 13) -#define MFP_DRIVE_FAST (0x8 << 13) - #undef MFP_CFG #undef MFP_CFG_DRV #undef MFP_CFG_LPM diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c index b03a6eda7419..a8400bb891e7 100644 --- a/arch/arm/mach-mmp/time.c +++ b/arch/arm/mach-mmp/time.c @@ -136,7 +136,7 @@ static struct clock_event_device ckevt = { .set_mode = timer_set_mode, }; -static cycle_t clksrc_read(void) +static cycle_t clksrc_read(struct clocksource *cs) { return timer_read(); } diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c index 9ba595083dab..1b22e4af8791 100644 --- a/arch/arm/mach-mv78xx0/common.c +++ b/arch/arm/mach-mv78xx0/common.c @@ -321,6 +321,9 @@ static struct platform_device mv78xx0_ge00 = { .id = 0, .num_resources = 1, .resource = mv78xx0_ge00_resources, + .dev = { + .coherent_dma_mask = 0xffffffff, + }, }; void __init mv78xx0_ge00_init(struct mv643xx_eth_platform_data *eth_data) @@ -375,6 +378,9 @@ static struct platform_device mv78xx0_ge01 = { .id = 1, .num_resources = 1, .resource = mv78xx0_ge01_resources, + .dev = { + .coherent_dma_mask = 0xffffffff, + }, }; void __init mv78xx0_ge01_init(struct mv643xx_eth_platform_data *eth_data) @@ -429,6 +435,9 @@ static struct platform_device mv78xx0_ge10 = { .id = 2, .num_resources = 1, .resource = mv78xx0_ge10_resources, + .dev = { + .coherent_dma_mask = 0xffffffff, + }, }; void __init mv78xx0_ge10_init(struct mv643xx_eth_platform_data *eth_data) @@ -496,6 +505,9 @@ static struct platform_device mv78xx0_ge11 = { .id = 3, .num_resources = 1, .resource = mv78xx0_ge11_resources, + .dev = { + .coherent_dma_mask = 0xffffffff, + }, }; void __init mv78xx0_ge11_init(struct mv643xx_eth_platform_data *eth_data) @@ -532,12 +544,10 @@ static struct mv64xxx_i2c_pdata mv78xx0_i2c_0_pdata = { static struct resource mv78xx0_i2c_0_resources[] = { { - .name = "i2c 0 base", .start = I2C_0_PHYS_BASE, .end = I2C_0_PHYS_BASE + 0x1f, .flags = IORESOURCE_MEM, }, { - .name = "i2c 0 irq", .start = IRQ_MV78XX0_I2C_0, .end = IRQ_MV78XX0_I2C_0, .flags = IORESOURCE_IRQ, @@ -567,12 +577,10 @@ static struct mv64xxx_i2c_pdata mv78xx0_i2c_1_pdata = { static struct resource mv78xx0_i2c_1_resources[] = { { - .name = "i2c 1 base", .start = I2C_1_PHYS_BASE, .end = I2C_1_PHYS_BASE + 0x1f, .flags = IORESOURCE_MEM, }, { - .name = "i2c 1 irq", .start = IRQ_MV78XX0_I2C_1, .end = IRQ_MV78XX0_I2C_1, .flags = IORESOURCE_IRQ, diff --git a/arch/arm/mach-mx2/clock_imx21.c b/arch/arm/mach-mx2/clock_imx21.c index 999d013e06e3..e4b08ca804ea 100644 --- a/arch/arm/mach-mx2/clock_imx21.c +++ b/arch/arm/mach-mx2/clock_imx21.c @@ -890,7 +890,7 @@ static struct clk clko_clk = { .con_id = n, \ .clk = &c, \ }, -static struct clk_lookup lookups[] __initdata = { +static struct clk_lookup lookups[] = { /* It's unlikely that any driver wants one of them directly: _REGISTER_CLOCK(NULL, "ckih", ckih_clk) _REGISTER_CLOCK(NULL, "ckil", ckil_clk) diff --git a/arch/arm/mach-mx2/clock_imx27.c b/arch/arm/mach-mx2/clock_imx27.c index 3f7280c490f0..2c971442f3f2 100644 --- a/arch/arm/mach-mx2/clock_imx27.c +++ b/arch/arm/mach-mx2/clock_imx27.c @@ -621,7 +621,7 @@ DEFINE_CLOCK1(csi_clk, 0, 0, 0, parent, &csi_clk1, &per4_clk); .clk = &c, \ }, -static struct clk_lookup lookups[] __initdata = { +static struct clk_lookup lookups[] = { _REGISTER_CLOCK("imx-uart.0", NULL, uart1_clk) _REGISTER_CLOCK("imx-uart.1", NULL, uart2_clk) _REGISTER_CLOCK("imx-uart.2", NULL, uart3_clk) diff --git a/arch/arm/mach-mx3/clock-imx35.c b/arch/arm/mach-mx3/clock-imx35.c index 53a112d4e04a..3c1e06f56dd6 100644 --- a/arch/arm/mach-mx3/clock-imx35.c +++ b/arch/arm/mach-mx3/clock-imx35.c @@ -404,7 +404,7 @@ DEFINE_CLOCK(gpu2d_clk, 0, CCM_CGR3, 4, NULL, NULL); .clk = &c, \ }, -static struct clk_lookup lookups[] __initdata = { +static struct clk_lookup lookups[] = { _REGISTER_CLOCK(NULL, "asrc", asrc_clk) _REGISTER_CLOCK(NULL, "ata", ata_clk) _REGISTER_CLOCK(NULL, "audmux", audmux_clk) diff --git a/arch/arm/mach-mx3/clock.c b/arch/arm/mach-mx3/clock.c index 9957a11533a4..a68fcf981edf 100644 --- a/arch/arm/mach-mx3/clock.c +++ b/arch/arm/mach-mx3/clock.c @@ -516,7 +516,7 @@ DEFINE_CLOCK(ipg_clk, 0, NULL, 0, ipg_get_rate, NULL, &ahb_clk); .clk = &c, \ }, -static struct clk_lookup lookups[] __initdata = { +static struct clk_lookup lookups[] = { _REGISTER_CLOCK(NULL, "emi", emi_clk) _REGISTER_CLOCK(NULL, "cspi", cspi1_clk) _REGISTER_CLOCK(NULL, "cspi", cspi2_clk) diff --git a/arch/arm/mach-omap2/clock24xx.c b/arch/arm/mach-omap2/clock24xx.c index efc59c49341b..e4cef333e291 100644 --- a/arch/arm/mach-omap2/clock24xx.c +++ b/arch/arm/mach-omap2/clock24xx.c @@ -103,10 +103,10 @@ static struct omap_clk omap24xx_clks[] = { CLK(NULL, "mdm_ick", &mdm_ick, CK_243X), CLK(NULL, "mdm_osc_ck", &mdm_osc_ck, CK_243X), /* DSS domain clocks */ - CLK(NULL, "dss_ick", &dss_ick, CK_243X | CK_242X), - CLK(NULL, "dss1_fck", &dss1_fck, CK_243X | CK_242X), - CLK(NULL, "dss2_fck", &dss2_fck, CK_243X | CK_242X), - CLK(NULL, "dss_54m_fck", &dss_54m_fck, CK_243X | CK_242X), + CLK("omapfb", "ick", &dss_ick, CK_243X | CK_242X), + CLK("omapfb", "dss1_fck", &dss1_fck, CK_243X | CK_242X), + CLK("omapfb", "dss2_fck", &dss2_fck, CK_243X | CK_242X), + CLK("omapfb", "tv_fck", &dss_54m_fck, CK_243X | CK_242X), /* L3 domain clocks */ CLK(NULL, "core_l3_ck", &core_l3_ck, CK_243X | CK_242X), CLK(NULL, "ssi_fck", &ssi_ssr_sst_fck, CK_243X | CK_242X), @@ -206,7 +206,7 @@ static struct omap_clk omap24xx_clks[] = { CLK(NULL, "aes_ick", &aes_ick, CK_243X | CK_242X), CLK(NULL, "pka_ick", &pka_ick, CK_243X | CK_242X), CLK(NULL, "usb_fck", &usb_fck, CK_243X | CK_242X), - CLK(NULL, "usbhs_ick", &usbhs_ick, CK_243X), + CLK("musb_hdrc", "ick", &usbhs_ick, CK_243X), CLK("mmci-omap-hs.0", "ick", &mmchs1_ick, CK_243X), CLK("mmci-omap-hs.0", "fck", &mmchs1_fck, CK_243X), CLK("mmci-omap-hs.1", "ick", &mmchs2_ick, CK_243X), diff --git a/arch/arm/mach-omap2/clock34xx.c b/arch/arm/mach-omap2/clock34xx.c index 0a14dca31e30..ba05aa42bd8e 100644 --- a/arch/arm/mach-omap2/clock34xx.c +++ b/arch/arm/mach-omap2/clock34xx.c @@ -157,7 +157,7 @@ static struct omap_clk omap34xx_clks[] = { CLK(NULL, "ssi_ssr_fck", &ssi_ssr_fck, CK_343X), CLK(NULL, "ssi_sst_fck", &ssi_sst_fck, CK_343X), CLK(NULL, "core_l3_ick", &core_l3_ick, CK_343X), - CLK(NULL, "hsotgusb_ick", &hsotgusb_ick, CK_343X), + CLK("musb_hdrc", "ick", &hsotgusb_ick, CK_343X), CLK(NULL, "sdrc_ick", &sdrc_ick, CK_343X), CLK(NULL, "gpmc_fck", &gpmc_fck, CK_343X), CLK(NULL, "security_l3_ick", &security_l3_ick, CK_343X), @@ -197,11 +197,11 @@ static struct omap_clk omap34xx_clks[] = { CLK("omap_rng", "ick", &rng_ick, CK_343X), CLK(NULL, "sha11_ick", &sha11_ick, CK_343X), CLK(NULL, "des1_ick", &des1_ick, CK_343X), - CLK(NULL, "dss1_alwon_fck", &dss1_alwon_fck, CK_343X), - CLK(NULL, "dss_tv_fck", &dss_tv_fck, CK_343X), - CLK(NULL, "dss_96m_fck", &dss_96m_fck, CK_343X), - CLK(NULL, "dss2_alwon_fck", &dss2_alwon_fck, CK_343X), - CLK(NULL, "dss_ick", &dss_ick, CK_343X), + CLK("omapfb", "dss1_fck", &dss1_alwon_fck, CK_343X), + CLK("omapfb", "tv_fck", &dss_tv_fck, CK_343X), + CLK("omapfb", "video_fck", &dss_96m_fck, CK_343X), + CLK("omapfb", "dss2_fck", &dss2_alwon_fck, CK_343X), + CLK("omapfb", "ick", &dss_ick, CK_343X), CLK(NULL, "cam_mclk", &cam_mclk, CK_343X), CLK(NULL, "cam_ick", &cam_ick, CK_343X), CLK(NULL, "csi2_96m_fck", &csi2_96m_fck, CK_343X), diff --git a/arch/arm/mach-omap2/clock34xx.h b/arch/arm/mach-omap2/clock34xx.h index 6763b8f73028..017a30e9aa1d 100644 --- a/arch/arm/mach-omap2/clock34xx.h +++ b/arch/arm/mach-omap2/clock34xx.h @@ -2182,7 +2182,7 @@ static struct clk wkup_32k_fck = { static struct clk gpio1_dbck = { .name = "gpio1_dbck", - .ops = &clkops_omap2_dflt_wait, + .ops = &clkops_omap2_dflt, .parent = &wkup_32k_fck, .enable_reg = OMAP_CM_REGADDR(WKUP_MOD, CM_FCLKEN), .enable_bit = OMAP3430_EN_GPIO1_SHIFT, @@ -2427,7 +2427,7 @@ static struct clk per_32k_alwon_fck = { static struct clk gpio6_dbck = { .name = "gpio6_dbck", - .ops = &clkops_omap2_dflt_wait, + .ops = &clkops_omap2_dflt, .parent = &per_32k_alwon_fck, .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_FCLKEN), .enable_bit = OMAP3430_EN_GPIO6_SHIFT, @@ -2437,7 +2437,7 @@ static struct clk gpio6_dbck = { static struct clk gpio5_dbck = { .name = "gpio5_dbck", - .ops = &clkops_omap2_dflt_wait, + .ops = &clkops_omap2_dflt, .parent = &per_32k_alwon_fck, .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_FCLKEN), .enable_bit = OMAP3430_EN_GPIO5_SHIFT, @@ -2447,7 +2447,7 @@ static struct clk gpio5_dbck = { static struct clk gpio4_dbck = { .name = "gpio4_dbck", - .ops = &clkops_omap2_dflt_wait, + .ops = &clkops_omap2_dflt, .parent = &per_32k_alwon_fck, .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_FCLKEN), .enable_bit = OMAP3430_EN_GPIO4_SHIFT, @@ -2457,7 +2457,7 @@ static struct clk gpio4_dbck = { static struct clk gpio3_dbck = { .name = "gpio3_dbck", - .ops = &clkops_omap2_dflt_wait, + .ops = &clkops_omap2_dflt, .parent = &per_32k_alwon_fck, .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_FCLKEN), .enable_bit = OMAP3430_EN_GPIO3_SHIFT, @@ -2467,7 +2467,7 @@ static struct clk gpio3_dbck = { static struct clk gpio2_dbck = { .name = "gpio2_dbck", - .ops = &clkops_omap2_dflt_wait, + .ops = &clkops_omap2_dflt, .parent = &per_32k_alwon_fck, .enable_reg = OMAP_CM_REGADDR(OMAP3430_PER_MOD, CM_FCLKEN), .enable_bit = OMAP3430_EN_GPIO2_SHIFT, diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c index 496983ade97e..894cc355818a 100644 --- a/arch/arm/mach-omap2/devices.c +++ b/arch/arm/mach-omap2/devices.c @@ -354,10 +354,12 @@ static void omap_init_mcspi(void) platform_device_register(&omap2_mcspi1); platform_device_register(&omap2_mcspi2); #if defined(CONFIG_ARCH_OMAP2430) || defined(CONFIG_ARCH_OMAP3) - platform_device_register(&omap2_mcspi3); + if (cpu_is_omap2430() || cpu_is_omap343x()) + platform_device_register(&omap2_mcspi3); #endif #ifdef CONFIG_ARCH_OMAP3 - platform_device_register(&omap2_mcspi4); + if (cpu_is_omap343x()) + platform_device_register(&omap2_mcspi4); #endif } diff --git a/arch/arm/mach-omap2/prm-regbits-34xx.h b/arch/arm/mach-omap2/prm-regbits-34xx.h index c6a7940f4287..9fd03a2ec95c 100644 --- a/arch/arm/mach-omap2/prm-regbits-34xx.h +++ b/arch/arm/mach-omap2/prm-regbits-34xx.h @@ -409,7 +409,7 @@ /* PM_PREPWSTST_CAM specific bits */ /* PM_PWSTCTRL_USBHOST specific bits */ -#define OMAP3430ES2_SAVEANDRESTORE_SHIFT (1 << 4) +#define OMAP3430ES2_SAVEANDRESTORE_SHIFT 4 /* RM_RSTST_PER specific bits */ diff --git a/arch/arm/mach-omap2/usb-tusb6010.c b/arch/arm/mach-omap2/usb-tusb6010.c index 8df55f40f4c0..8622c24cd270 100644 --- a/arch/arm/mach-omap2/usb-tusb6010.c +++ b/arch/arm/mach-omap2/usb-tusb6010.c @@ -187,7 +187,7 @@ int tusb6010_platform_retime(unsigned is_refclk) unsigned sysclk_ps; int status; - if (!refclk_psec || sysclk_ps == 0) + if (!refclk_psec || fclk_ps == 0) return -ENODEV; sysclk_ps = is_refclk ? refclk_psec : TUSB6010_OSCCLK_60; diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index 6af99ddabdfb..b1c7778d9f96 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -188,6 +188,9 @@ static struct platform_device orion5x_eth = { .id = 0, .num_resources = 1, .resource = orion5x_eth_resources, + .dev = { + .coherent_dma_mask = 0xffffffff, + }, }; void __init orion5x_eth_init(struct mv643xx_eth_platform_data *eth_data) @@ -248,12 +251,10 @@ static struct mv64xxx_i2c_pdata orion5x_i2c_pdata = { static struct resource orion5x_i2c_resources[] = { { - .name = "i2c base", .start = I2C_PHYS_BASE, .end = I2C_PHYS_BASE + 0x1f, .flags = IORESOURCE_MEM, }, { - .name = "i2c irq", .start = IRQ_ORION5X_I2C, .end = IRQ_ORION5X_I2C, .flags = IORESOURCE_IRQ, diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c index d245e59c51b1..29970f703f3c 100644 --- a/arch/arm/mach-pxa/devices.c +++ b/arch/arm/mach-pxa/devices.c @@ -72,7 +72,10 @@ void __init pxa_set_mci_info(struct pxamci_platform_data *info) } -static struct pxa2xx_udc_mach_info pxa_udc_info; +static struct pxa2xx_udc_mach_info pxa_udc_info = { + .gpio_pullup = -1, + .gpio_vbus = -1, +}; void __init pxa_set_udc_info(struct pxa2xx_udc_mach_info *info) { diff --git a/arch/arm/mach-pxa/ezx.c b/arch/arm/mach-pxa/ezx.c index 92ba16e1b6fc..7db966dc29ce 100644 --- a/arch/arm/mach-pxa/ezx.c +++ b/arch/arm/mach-pxa/ezx.c @@ -111,9 +111,9 @@ static unsigned long ezx_pin_config[] __initdata = { GPIO25_SSP1_TXD, GPIO26_SSP1_RXD, GPIO24_GPIO, /* pcap chip select */ - GPIO1_GPIO, /* pcap interrupt */ - GPIO4_GPIO, /* WDI_AP */ - GPIO55_GPIO, /* SYS_RESTART */ + GPIO1_GPIO | WAKEUP_ON_EDGE_RISE, /* pcap interrupt */ + GPIO4_GPIO | MFP_LPM_DRIVE_HIGH, /* WDI_AP */ + GPIO55_GPIO | MFP_LPM_DRIVE_HIGH, /* SYS_RESTART */ /* MMC */ GPIO32_MMC_CLK, @@ -144,20 +144,20 @@ static unsigned long ezx_pin_config[] __initdata = { #if defined(CONFIG_MACH_EZX_A780) || defined(CONFIG_MACH_EZX_E680) static unsigned long gen1_pin_config[] __initdata = { /* flip / lockswitch */ - GPIO12_GPIO, + GPIO12_GPIO | WAKEUP_ON_EDGE_BOTH, /* bluetooth (bcm2035) */ - GPIO14_GPIO | WAKEUP_ON_LEVEL_HIGH, /* HOSTWAKE */ + GPIO14_GPIO | WAKEUP_ON_EDGE_RISE, /* HOSTWAKE */ GPIO48_GPIO, /* RESET */ GPIO28_GPIO, /* WAKEUP */ /* Neptune handshake */ - GPIO0_GPIO | WAKEUP_ON_LEVEL_HIGH, /* BP_RDY */ - GPIO57_GPIO, /* AP_RDY */ - GPIO13_GPIO | WAKEUP_ON_LEVEL_HIGH, /* WDI */ - GPIO3_GPIO | WAKEUP_ON_LEVEL_HIGH, /* WDI2 */ - GPIO82_GPIO, /* RESET */ - GPIO99_GPIO, /* TC_MM_EN */ + GPIO0_GPIO | WAKEUP_ON_EDGE_FALL, /* BP_RDY */ + GPIO57_GPIO | MFP_LPM_DRIVE_HIGH, /* AP_RDY */ + GPIO13_GPIO | WAKEUP_ON_EDGE_BOTH, /* WDI */ + GPIO3_GPIO | WAKEUP_ON_EDGE_BOTH, /* WDI2 */ + GPIO82_GPIO | MFP_LPM_DRIVE_HIGH, /* RESET */ + GPIO99_GPIO | MFP_LPM_DRIVE_HIGH, /* TC_MM_EN */ /* sound */ GPIO52_SSP3_SCLK, @@ -199,21 +199,21 @@ static unsigned long gen1_pin_config[] __initdata = { defined(CONFIG_MACH_EZX_E2) || defined(CONFIG_MACH_EZX_E6) static unsigned long gen2_pin_config[] __initdata = { /* flip / lockswitch */ - GPIO15_GPIO, + GPIO15_GPIO | WAKEUP_ON_EDGE_BOTH, /* EOC */ - GPIO10_GPIO, + GPIO10_GPIO | WAKEUP_ON_EDGE_RISE, /* bluetooth (bcm2045) */ - GPIO13_GPIO | WAKEUP_ON_LEVEL_HIGH, /* HOSTWAKE */ + GPIO13_GPIO | WAKEUP_ON_EDGE_RISE, /* HOSTWAKE */ GPIO37_GPIO, /* RESET */ GPIO57_GPIO, /* WAKEUP */ /* Neptune handshake */ - GPIO0_GPIO | WAKEUP_ON_LEVEL_HIGH, /* BP_RDY */ - GPIO96_GPIO, /* AP_RDY */ - GPIO3_GPIO | WAKEUP_ON_LEVEL_HIGH, /* WDI */ - GPIO116_GPIO, /* RESET */ + GPIO0_GPIO | WAKEUP_ON_EDGE_FALL, /* BP_RDY */ + GPIO96_GPIO | MFP_LPM_DRIVE_HIGH, /* AP_RDY */ + GPIO3_GPIO | WAKEUP_ON_EDGE_FALL, /* WDI */ + GPIO116_GPIO | MFP_LPM_DRIVE_HIGH, /* RESET */ GPIO41_GPIO, /* BP_FLASH */ /* sound */ diff --git a/arch/arm/mach-pxa/imote2.c b/arch/arm/mach-pxa/imote2.c index 2121309b2474..2b27336c29f1 100644 --- a/arch/arm/mach-pxa/imote2.c +++ b/arch/arm/mach-pxa/imote2.c @@ -412,7 +412,7 @@ static struct platform_device imote2_flash_device = { */ static struct i2c_board_info __initdata imote2_i2c_board_info[] = { { /* UCAM sensor board */ - .type = "max1238", + .type = "max1239", .addr = 0x35, }, { /* ITS400 Sensor board only */ .type = "max1363", diff --git a/arch/arm/mach-pxa/include/mach/reset.h b/arch/arm/mach-pxa/include/mach/reset.h index 31e6a7b6ad80..b6c10556fbc7 100644 --- a/arch/arm/mach-pxa/include/mach/reset.h +++ b/arch/arm/mach-pxa/include/mach/reset.h @@ -13,8 +13,9 @@ extern void clear_reset_status(unsigned int mask); /** * init_gpio_reset() - register GPIO as reset generator * @gpio: gpio nr - * @output: set gpio as out/low instead of input during normal work + * @output: set gpio as output instead of input during normal work + * @level: output level */ -extern int init_gpio_reset(int gpio, int output); +extern int init_gpio_reset(int gpio, int output, int level); #endif /* __ASM_ARCH_RESET_H */ diff --git a/arch/arm/mach-pxa/mfp-pxa2xx.c b/arch/arm/mach-pxa/mfp-pxa2xx.c index 7ffb91d64c39..cf6b720c055f 100644 --- a/arch/arm/mach-pxa/mfp-pxa2xx.c +++ b/arch/arm/mach-pxa/mfp-pxa2xx.c @@ -322,6 +322,7 @@ static inline void pxa27x_mfp_init(void) {} #ifdef CONFIG_PM static unsigned long saved_gafr[2][4]; static unsigned long saved_gpdr[4]; +static unsigned long saved_pgsr[4]; static int pxa2xx_mfp_suspend(struct sys_device *d, pm_message_t state) { @@ -332,6 +333,7 @@ static int pxa2xx_mfp_suspend(struct sys_device *d, pm_message_t state) saved_gafr[0][i] = GAFR_L(i); saved_gafr[1][i] = GAFR_U(i); saved_gpdr[i] = GPDR(i * 32); + saved_pgsr[i] = PGSR(i); GPDR(i * 32) = gpdr_lpm[i]; } @@ -346,6 +348,7 @@ static int pxa2xx_mfp_resume(struct sys_device *d) GAFR_L(i) = saved_gafr[0][i]; GAFR_U(i) = saved_gafr[1][i]; GPDR(i * 32) = saved_gpdr[i]; + PGSR(i) = saved_pgsr[i]; } PSSR = PSSR_RDH | PSSR_PH; return 0; @@ -374,6 +377,9 @@ static int __init pxa2xx_mfp_init(void) if (cpu_is_pxa27x()) pxa27x_mfp_init(); + /* clear RDH bit to enable GPIO receivers after reset/sleep exit */ + PSSR = PSSR_RDH; + /* initialize gafr_run[], pgsr_lpm[] from existing values */ for (i = 0; i <= gpio_to_bank(pxa_last_gpio); i++) gpdr_lpm[i] = GPDR(i * 32); diff --git a/arch/arm/mach-pxa/palmld.c b/arch/arm/mach-pxa/palmld.c index 1cec1806f002..471a853e548b 100644 --- a/arch/arm/mach-pxa/palmld.c +++ b/arch/arm/mach-pxa/palmld.c @@ -62,6 +62,8 @@ static unsigned long palmld_pin_config[] __initdata = { GPIO29_AC97_SDATA_IN_0, GPIO30_AC97_SDATA_OUT, GPIO31_AC97_SYNC, + GPIO89_AC97_SYSCLK, + GPIO95_AC97_nRESET, /* IrDA */ GPIO108_GPIO, /* ir disable */ diff --git a/arch/arm/mach-pxa/palmt5.c b/arch/arm/mach-pxa/palmt5.c index 30662363907b..05bf979b78a6 100644 --- a/arch/arm/mach-pxa/palmt5.c +++ b/arch/arm/mach-pxa/palmt5.c @@ -64,6 +64,7 @@ static unsigned long palmt5_pin_config[] __initdata = { GPIO29_AC97_SDATA_IN_0, GPIO30_AC97_SDATA_OUT, GPIO31_AC97_SYNC, + GPIO89_AC97_SYSCLK, GPIO95_AC97_nRESET, /* IrDA */ diff --git a/arch/arm/mach-pxa/palmtx.c b/arch/arm/mach-pxa/palmtx.c index e2d44b1a8a9b..e99a893c58a7 100644 --- a/arch/arm/mach-pxa/palmtx.c +++ b/arch/arm/mach-pxa/palmtx.c @@ -65,6 +65,7 @@ static unsigned long palmtx_pin_config[] __initdata = { GPIO29_AC97_SDATA_IN_0, GPIO30_AC97_SDATA_OUT, GPIO31_AC97_SYNC, + GPIO89_AC97_SYSCLK, GPIO95_AC97_nRESET, /* IrDA */ diff --git a/arch/arm/mach-pxa/reset.c b/arch/arm/mach-pxa/reset.c index df29d45fb4e7..01e9d643394a 100644 --- a/arch/arm/mach-pxa/reset.c +++ b/arch/arm/mach-pxa/reset.c @@ -20,7 +20,7 @@ static void do_hw_reset(void); static int reset_gpio = -1; -int init_gpio_reset(int gpio, int output) +int init_gpio_reset(int gpio, int output, int level) { int rc; @@ -31,7 +31,7 @@ int init_gpio_reset(int gpio, int output) } if (output) - rc = gpio_direction_output(gpio, 0); + rc = gpio_direction_output(gpio, level); else rc = gpio_direction_input(gpio); if (rc) { diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index c18e34acafcb..5a45fe340a10 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -531,9 +531,15 @@ static int spitz_ohci_init(struct device *dev) return gpio_direction_output(SPITZ_GPIO_USB_HOST, 1); } +static void spitz_ohci_exit(struct device *dev) +{ + gpio_free(SPITZ_GPIO_USB_HOST); +} + static struct pxaohci_platform_data spitz_ohci_platform_data = { .port_mode = PMM_NPS_MODE, .init = spitz_ohci_init, + .exit = spitz_ohci_exit, .flags = ENABLE_PORT_ALL | NO_OC_PROTECTION, .power_budget = 150, }; @@ -731,7 +737,7 @@ static void spitz_restart(char mode, const char *cmd) static void __init common_init(void) { - init_gpio_reset(SPITZ_GPIO_ON_RESET, 1); + init_gpio_reset(SPITZ_GPIO_ON_RESET, 1, 0); pm_power_off = spitz_poweroff; arm_pm_restart = spitz_restart; diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c index afac5b6d3d78..a0bd46ef5d30 100644 --- a/arch/arm/mach-pxa/tosa.c +++ b/arch/arm/mach-pxa/tosa.c @@ -897,7 +897,7 @@ static void __init tosa_init(void) gpio_set_wake(MFP_PIN_GPIO1, 1); /* We can't pass to gpio-keys since it will drop the Reset altfunc */ - init_gpio_reset(TOSA_GPIO_ON_RESET, 0); + init_gpio_reset(TOSA_GPIO_ON_RESET, 0, 0); pm_power_off = tosa_poweroff; arm_pm_restart = tosa_restart; diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c index 0e65344e9f53..dd031cc41847 100644 --- a/arch/arm/mach-pxa/viper.c +++ b/arch/arm/mach-pxa/viper.c @@ -46,6 +46,7 @@ #include <mach/audio.h> #include <mach/pxafb.h> #include <mach/i2c.h> +#include <mach/regs-uart.h> #include <mach/viper.h> #include <asm/setup.h> diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index 942e1a7eb9b2..076acbc50706 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -750,14 +750,6 @@ void __init realview_timer_init(unsigned int timer_irq) { u32 val; -#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST - /* - * The dummy clock device has to be registered before the main device - * so that the latter will broadcast the clock events - */ - local_timer_setup(); -#endif - /* * set clock frequency: * REALVIEW_REFCLK is 32KHz diff --git a/arch/arm/mach-realview/include/mach/smp.h b/arch/arm/mach-realview/include/mach/smp.h index 515819efd046..dd53892d44a7 100644 --- a/arch/arm/mach-realview/include/mach/smp.h +++ b/arch/arm/mach-realview/include/mach/smp.h @@ -15,16 +15,9 @@ /* * We use IRQ1 as the IPI */ -static inline void smp_cross_call(cpumask_t callmap) -{ - gic_raise_softirq(callmap, 1); -} - -/* - * Do nothing on MPcore. - */ -static inline void smp_cross_call_done(cpumask_t callmap) +static inline void smp_cross_call(const struct cpumask *mask) { + gic_raise_softirq(mask, 1); } #endif diff --git a/arch/arm/mach-realview/localtimer.c b/arch/arm/mach-realview/localtimer.c index d0d39adf6407..1c01d13460f0 100644 --- a/arch/arm/mach-realview/localtimer.c +++ b/arch/arm/mach-realview/localtimer.c @@ -189,8 +189,10 @@ void __cpuinit local_timer_setup(void) struct clock_event_device *clk = &per_cpu(local_clockevent, cpu); clk->name = "dummy_timer"; - clk->features = CLOCK_EVT_FEAT_DUMMY; - clk->rating = 200; + clk->features = CLOCK_EVT_FEAT_ONESHOT | + CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_DUMMY; + clk->rating = 400; clk->mult = 1; clk->set_mode = dummy_timer_set_mode; clk->broadcast = smp_timer_broadcast; diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c index ea3c75595fa9..30a9c68591f6 100644 --- a/arch/arm/mach-realview/platsmp.c +++ b/arch/arm/mach-realview/platsmp.c @@ -78,13 +78,6 @@ void __cpuinit platform_secondary_init(unsigned int cpu) trace_hardirqs_off(); /* - * the primary core may have used a "cross call" soft interrupt - * to get this processor out of WFI in the BootMonitor - make - * sure that we are no longer being sent this soft interrupt - */ - smp_cross_call_done(cpumask_of_cpu(cpu)); - - /* * if any interrupts are already enabled for the primary * core (e.g. timer irq), then they will not have been enabled * for us: do so @@ -136,7 +129,7 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) * Use smp_cross_call() for this, since there's little * point duplicating the code here */ - smp_cross_call(cpumask_of_cpu(cpu)); + smp_cross_call(cpumask_of(cpu)); timeout = jiffies + (1 * HZ); while (time_before(jiffies, timeout)) { @@ -224,11 +217,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) if (max_cpus > ncores) max_cpus = ncores; -#ifdef CONFIG_LOCAL_TIMERS +#if defined(CONFIG_LOCAL_TIMERS) || defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) /* - * Enable the local timer for primary CPU. If the device is - * dummy (!CONFIG_LOCAL_TIMERS), it was already registers in - * realview_timer_init + * Enable the local timer or broadcast device for the boot CPU. */ local_timer_setup(); #endif diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c index 4389c160f7d0..8637dea5e150 100644 --- a/arch/arm/mach-s3c2410/mach-bast.c +++ b/arch/arm/mach-s3c2410/mach-bast.c @@ -588,8 +588,6 @@ static void __init bast_map_io(void) s3c_device_nand.dev.platform_data = &bast_nand_info; - s3c_i2c0_set_platdata(&bast_i2c_info); - s3c24xx_init_io(bast_iodesc, ARRAY_SIZE(bast_iodesc)); s3c24xx_init_clocks(0); s3c24xx_init_uarts(bast_uartcfgs, ARRAY_SIZE(bast_uartcfgs)); @@ -602,6 +600,7 @@ static void __init bast_init(void) sysdev_class_register(&bast_pm_sysclass); sysdev_register(&bast_pm_sysdev); + s3c_i2c0_set_platdata(&bast_i2c_info); s3c24xx_fb_set_platdata(&bast_fb_info); platform_add_devices(bast_devices, ARRAY_SIZE(bast_devices)); diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index 1f929c391af7..b3bebcc5623b 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -413,7 +413,7 @@ static struct clk ref24_clk = { .rate = 24000000, }; -static struct clk_lookup lookups[] __initdata = { +static struct clk_lookup lookups[] = { { /* UART0 */ .dev_id = "dev:f1", .clk = &ref24_clk, diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 3397f1e64d76..a08d9d2380d3 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -184,23 +184,37 @@ __v7_setup: stmia r12, {r0-r5, r7, r9, r11, lr} bl v7_flush_dcache_all ldmia r12, {r0-r5, r7, r9, r11, lr} + + mrc p15, 0, r0, c0, c0, 0 @ read main ID register + and r10, r0, #0xff000000 @ ARM? + teq r10, #0x41000000 + bne 2f + and r5, r0, #0x00f00000 @ variant + and r6, r0, #0x0000000f @ revision + orr r0, r6, r5, lsr #20-4 @ combine variant and revision + #ifdef CONFIG_ARM_ERRATA_430973 - mrc p15, 0, r10, c1, c0, 1 @ read aux control register - orr r10, r10, #(1 << 6) @ set IBE to 1 - mcr p15, 0, r10, c1, c0, 1 @ write aux control register + teq r5, #0x00100000 @ only present in r1p* + mrceq p15, 0, r10, c1, c0, 1 @ read aux control register + orreq r10, r10, #(1 << 6) @ set IBE to 1 + mcreq p15, 0, r10, c1, c0, 1 @ write aux control register #endif #ifdef CONFIG_ARM_ERRATA_458693 - mrc p15, 0, r10, c1, c0, 1 @ read aux control register - orr r10, r10, #(1 << 5) @ set L1NEON to 1 - orr r10, r10, #(1 << 9) @ set PLDNOP to 1 - mcr p15, 0, r10, c1, c0, 1 @ write aux control register + teq r0, #0x20 @ only present in r2p0 + mrceq p15, 0, r10, c1, c0, 1 @ read aux control register + orreq r10, r10, #(1 << 5) @ set L1NEON to 1 + orreq r10, r10, #(1 << 9) @ set PLDNOP to 1 + mcreq p15, 0, r10, c1, c0, 1 @ write aux control register #endif #ifdef CONFIG_ARM_ERRATA_460075 - mrc p15, 1, r10, c9, c0, 2 @ read L2 cache aux ctrl register - orr r10, r10, #(1 << 22) @ set the Write Allocate disable bit - mcr p15, 1, r10, c9, c0, 2 @ write the L2 cache aux ctrl register + teq r0, #0x20 @ only present in r2p0 + mrceq p15, 1, r10, c9, c0, 2 @ read L2 cache aux ctrl register + tsteq r10, #1 << 22 + orreq r10, r10, #(1 << 22) @ set the Write Allocate disable bit + mcreq p15, 1, r10, c9, c0, 2 @ write the L2 cache aux ctrl register #endif - mov r10, #0 + +2: mov r10, #0 #ifdef HARVARD_CACHE mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate #endif diff --git a/arch/arm/nwfpe/fpa11.h b/arch/arm/nwfpe/fpa11.h index 386cbd13eaf4..d3a6f9298e9e 100644 --- a/arch/arm/nwfpe/fpa11.h +++ b/arch/arm/nwfpe/fpa11.h @@ -114,4 +114,8 @@ extern unsigned int SingleCPDO(struct roundingData *roundData, extern unsigned int DoubleCPDO(struct roundingData *roundData, const unsigned int opcode, FPREG * rFd); +/* extneded_cpdo.c */ +extern unsigned int ExtendedCPDO(struct roundingData *roundData, + const unsigned int opcode, FPREG * rFd); + #endif diff --git a/arch/arm/nwfpe/fpa11_cprt.c b/arch/arm/nwfpe/fpa11_cprt.c index 9843dc533047..31c4eeec18b0 100644 --- a/arch/arm/nwfpe/fpa11_cprt.c +++ b/arch/arm/nwfpe/fpa11_cprt.c @@ -27,10 +27,6 @@ #include "fpmodule.inl" #include "softfloat.h" -#ifdef CONFIG_FPE_NWFPE_XP -extern flag floatx80_is_nan(floatx80); -#endif - unsigned int PerformFLT(const unsigned int opcode); unsigned int PerformFIX(const unsigned int opcode); diff --git a/arch/arm/nwfpe/softfloat.h b/arch/arm/nwfpe/softfloat.h index 260fe29d73f5..13e479c5da57 100644 --- a/arch/arm/nwfpe/softfloat.h +++ b/arch/arm/nwfpe/softfloat.h @@ -226,6 +226,8 @@ char floatx80_le_quiet( floatx80, floatx80 ); char floatx80_lt_quiet( floatx80, floatx80 ); char floatx80_is_signaling_nan( floatx80 ); +extern flag floatx80_is_nan(floatx80); + #endif static inline flag extractFloat32Sign(float32 a) diff --git a/arch/arm/plat-omap/fb.c b/arch/arm/plat-omap/fb.c index ce6b4baeedec..3746222bed10 100644 --- a/arch/arm/plat-omap/fb.c +++ b/arch/arm/plat-omap/fb.c @@ -206,9 +206,10 @@ void __init omapfb_reserve_sdram(void) config_invalid = 1; return; } - if (rg.paddr) + if (rg.paddr) { reserve_bootmem(rg.paddr, rg.size, BOOTMEM_DEFAULT); - reserved += rg.size; + reserved += rg.size; + } omapfb_config.mem_desc.region[i] = rg; configured_regions++; } diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c index 17d7afe42b83..ee0b21f5b094 100644 --- a/arch/arm/plat-omap/gpio.c +++ b/arch/arm/plat-omap/gpio.c @@ -307,7 +307,7 @@ static inline int gpio_valid(int gpio) return 0; if (cpu_is_omap24xx() && gpio < 128) return 0; - if (cpu_is_omap34xx() && gpio < 160) + if (cpu_is_omap34xx() && gpio < 192) return 0; return -1; } diff --git a/arch/arm/plat-s3c/clock.c b/arch/arm/plat-s3c/clock.c index b6be76e2fe51..4d01ef1a25dd 100644 --- a/arch/arm/plat-s3c/clock.c +++ b/arch/arm/plat-s3c/clock.c @@ -306,8 +306,6 @@ struct clk s3c24xx_uclk = { int s3c24xx_register_clock(struct clk *clk) { - clk->owner = THIS_MODULE; - if (clk->enable == NULL) clk->enable = clk_null_enable; diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c index aee2aeb46c60..07326f632361 100644 --- a/arch/arm/plat-s3c24xx/dma.c +++ b/arch/arm/plat-s3c24xx/dma.c @@ -1235,7 +1235,7 @@ int s3c2410_dma_getposition(unsigned int channel, dma_addr_t *src, dma_addr_t *d EXPORT_SYMBOL(s3c2410_dma_getposition); -static struct s3c2410_dma_chan *to_dma_chan(struct sys_device *dev) +static inline struct s3c2410_dma_chan *to_dma_chan(struct sys_device *dev) { return container_of(dev, struct s3c2410_dma_chan, dev); } diff --git a/arch/arm/plat-s3c64xx/gpiolib.c b/arch/arm/plat-s3c64xx/gpiolib.c index ee9188add8fb..78ee52cffc9e 100644 --- a/arch/arm/plat-s3c64xx/gpiolib.c +++ b/arch/arm/plat-s3c64xx/gpiolib.c @@ -57,7 +57,7 @@ #if 1 #define gpio_dbg(x...) do { } while(0) #else -#define gpio_dbg(x...) printk(KERN_DEBUG ## x) +#define gpio_dbg(x...) printk(KERN_DEBUG x) #endif /* The s3c64xx_gpiolib_4bit routines are to control the gpio banks where diff --git a/arch/arm/plat-s3c64xx/include/plat/gpio-bank-h.h b/arch/arm/plat-s3c64xx/include/plat/gpio-bank-h.h index 81549516572f..2ba1767512d7 100644 --- a/arch/arm/plat-s3c64xx/include/plat/gpio-bank-h.h +++ b/arch/arm/plat-s3c64xx/include/plat/gpio-bank-h.h @@ -61,14 +61,14 @@ #define S3C64XX_GPH7_ADDR_CF1 (0x06 << 28) #define S3C64XX_GPH7_EINT_G6_7 (0x07 << 28) -#define S3C64XX_GPH8_MMC1_DATA6 (0x02 << 32) -#define S3C64XX_GPH8_MMC2_DATA2 (0x03 << 32) -#define S3C64XX_GPH8_I2S_V40_LRCLK (0x05 << 32) -#define S3C64XX_GPH8_ADDR_CF2 (0x06 << 32) -#define S3C64XX_GPH8_EINT_G6_8 (0x07 << 32) - -#define S3C64XX_GPH9_MMC1_DATA7 (0x02 << 36) -#define S3C64XX_GPH9_MMC2_DATA3 (0x03 << 36) -#define S3C64XX_GPH9_I2S_V40_DI (0x05 << 36) -#define S3C64XX_GPH9_EINT_G6_9 (0x07 << 36) +#define S3C64XX_GPH8_MMC1_DATA6 (0x02 << 0) +#define S3C64XX_GPH8_MMC2_DATA2 (0x03 << 0) +#define S3C64XX_GPH8_I2S_V40_LRCLK (0x05 << 0) +#define S3C64XX_GPH8_ADDR_CF2 (0x06 << 0) +#define S3C64XX_GPH8_EINT_G6_8 (0x07 << 0) +#define S3C64XX_GPH9_OUTPUT (0x01 << 4) +#define S3C64XX_GPH9_MMC1_DATA7 (0x02 << 4) +#define S3C64XX_GPH9_MMC2_DATA3 (0x03 << 4) +#define S3C64XX_GPH9_I2S_V40_DI (0x05 << 4) +#define S3C64XX_GPH9_EINT_G6_9 (0x07 << 4) diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types index 945e0d237a1d..fec64678a63a 100644 --- a/arch/arm/tools/mach-types +++ b/arch/arm/tools/mach-types @@ -12,7 +12,7 @@ # # http://www.arm.linux.org.uk/developer/machines/?action=new # -# Last update: Mon Mar 23 20:09:01 2009 +# Last update: Fri May 29 10:14:20 2009 # # machine_is_xxx CONFIG_xxxx MACH_TYPE_xxx number # @@ -916,7 +916,7 @@ nxdb500 MACH_NXDB500 NXDB500 905 apf9328 MACH_APF9328 APF9328 906 omap_wipoq MACH_OMAP_WIPOQ OMAP_WIPOQ 907 omap_twip MACH_OMAP_TWIP OMAP_TWIP 908 -palmt650 MACH_PALMT650 PALMT650 909 +treo650 MACH_TREO650 TREO650 909 acumen MACH_ACUMEN ACUMEN 910 xp100 MACH_XP100 XP100 911 fs2410 MACH_FS2410 FS2410 912 @@ -1232,7 +1232,7 @@ ql202b MACH_QL202B QL202B 1226 vpac270 MACH_VPAC270 VPAC270 1227 rd129 MACH_RD129 RD129 1228 htcwizard MACH_HTCWIZARD HTCWIZARD 1229 -xscale_treo680 MACH_XSCALE_TREO680 XSCALE_TREO680 1230 +treo680 MACH_TREO680 TREO680 1230 tecon_tmezon MACH_TECON_TMEZON TECON_TMEZON 1231 zylonite MACH_ZYLONITE ZYLONITE 1233 gene1270 MACH_GENE1270 GENE1270 1234 @@ -1418,10 +1418,10 @@ looxc550 MACH_LOOXC550 LOOXC550 1417 cnty_titan MACH_CNTY_TITAN CNTY_TITAN 1418 app3xx MACH_APP3XX APP3XX 1419 sideoatsgrama MACH_SIDEOATSGRAMA SIDEOATSGRAMA 1420 -palmtreo700p MACH_PALMTREO700P PALMTREO700P 1421 -palmtreo700w MACH_PALMTREO700W PALMTREO700W 1422 -palmtreo750 MACH_PALMTREO750 PALMTREO750 1423 -palmtreo755p MACH_PALMTREO755P PALMTREO755P 1424 +treo700p MACH_TREO700P TREO700P 1421 +treo700w MACH_TREO700W TREO700W 1422 +treo750 MACH_TREO750 TREO750 1423 +treo755p MACH_TREO755P TREO755P 1424 ezreganut9200 MACH_EZREGANUT9200 EZREGANUT9200 1425 sarge MACH_SARGE SARGE 1426 a696 MACH_A696 A696 1427 @@ -1721,7 +1721,7 @@ sapphire MACH_SAPPHIRE SAPPHIRE 1729 csb637xo MACH_CSB637XO CSB637XO 1730 evisiong MACH_EVISIONG EVISIONG 1731 stmp37xx MACH_STMP37XX STMP37XX 1732 -stmp378x MACH_STMP38XX STMP38XX 1733 +stmp378x MACH_STMP378X STMP378X 1733 tnt MACH_TNT TNT 1734 tbxt MACH_TBXT TBXT 1735 playmate MACH_PLAYMATE PLAYMATE 1736 @@ -1817,7 +1817,7 @@ smdkc100 MACH_SMDKC100 SMDKC100 1826 tavorevb MACH_TAVOREVB TAVOREVB 1827 saar MACH_SAAR SAAR 1828 deister_eyecam MACH_DEISTER_EYECAM DEISTER_EYECAM 1829 -at91sam9m10ek MACH_AT91SAM9M10EK AT91SAM9M10EK 1830 +at91sam9m10g45ek MACH_AT91SAM9M10G45EK AT91SAM9M10G45EK 1830 linkstation_produo MACH_LINKSTATION_PRODUO LINKSTATION_PRODUO 1831 hit_b0 MACH_HIT_B0 HIT_B0 1832 adx_rmu MACH_ADX_RMU ADX_RMU 1833 @@ -2132,3 +2132,116 @@ apollo MACH_APOLLO APOLLO 2141 at91cap9stk MACH_AT91CAP9STK AT91CAP9STK 2142 spc300 MACH_SPC300 SPC300 2143 eko MACH_EKO EKO 2144 +ccw9m2443 MACH_CCW9M2443 CCW9M2443 2145 +ccw9m2443js MACH_CCW9M2443JS CCW9M2443JS 2146 +m2m_router_device MACH_M2M_ROUTER_DEVICE M2M_ROUTER_DEVICE 2147 +str9104nas MACH_STAR9104NAS STAR9104NAS 2148 +pca100 MACH_PCA100 PCA100 2149 +z3_dm365_mod_01 MACH_Z3_DM365_MOD_01 Z3_DM365_MOD_01 2150 +hipox MACH_HIPOX HIPOX 2151 +omap3_piteds MACH_OMAP3_PITEDS OMAP3_PITEDS 2152 +bm150r MACH_BM150R BM150R 2153 +tbone MACH_TBONE TBONE 2154 +merlin MACH_MERLIN MERLIN 2155 +falcon MACH_FALCON FALCON 2156 +davinci_da850_evm MACH_DAVINCI_DA850_EVM DAVINCI_DA850_EVM 2157 +s5p6440 MACH_S5P6440 S5P6440 2158 +at91sam9g10ek MACH_AT91SAM9G10EK AT91SAM9G10EK 2159 +omap_4430sdp MACH_OMAP_4430SDP OMAP_4430SDP 2160 +lpc313x MACH_LPC313X LPC313X 2161 +magx_zn5 MACH_MAGX_ZN5 MAGX_ZN5 2162 +magx_em30 MACH_MAGX_EM30 MAGX_EM30 2163 +magx_ve66 MACH_MAGX_VE66 MAGX_VE66 2164 +meesc MACH_MEESC MEESC 2165 +otc570 MACH_OTC570 OTC570 2166 +bcu2412 MACH_BCU2412 BCU2412 2167 +beacon MACH_BEACON BEACON 2168 +actia_tgw MACH_ACTIA_TGW ACTIA_TGW 2169 +e4430 MACH_E4430 E4430 2170 +ql300 MACH_QL300 QL300 2171 +btmavb101 MACH_BTMAVB101 BTMAVB101 2172 +btmawb101 MACH_BTMAWB101 BTMAWB101 2173 +sq201 MACH_SQ201 SQ201 2174 +quatro45xx MACH_QUATRO45XX QUATRO45XX 2175 +openpad MACH_OPENPAD OPENPAD 2176 +tx25 MACH_TX25 TX25 2177 +omap3_torpedo MACH_OMAP3_TORPEDO OMAP3_TORPEDO 2178 +htcraphael_k MACH_HTCRAPHAEL_K HTCRAPHAEL_K 2179 +lal43 MACH_LAL43 LAL43 2181 +htcraphael_cdma500 MACH_HTCRAPHAEL_CDMA500 HTCRAPHAEL_CDMA500 2182 +anw6410 MACH_ANW6410 ANW6410 2183 +htcprophet MACH_HTCPROPHET HTCPROPHET 2185 +cfa_10022 MACH_CFA_10022 CFA_10022 2186 +imx27_visstrim_m10 MACH_IMX27_VISSTRIM_M10 IMX27_VISSTRIM_M10 2187 +px2imx27 MACH_PX2IMX27 PX2IMX27 2188 +stm3210e_eval MACH_STM3210E_EVAL STM3210E_EVAL 2189 +dvs10 MACH_DVS10 DVS10 2190 +portuxg20 MACH_PORTUXG20 PORTUXG20 2191 +arm_spv MACH_ARM_SPV ARM_SPV 2192 +smdkc110 MACH_SMDKC110 SMDKC110 2193 +cabespresso MACH_CABESPRESSO CABESPRESSO 2194 +hmc800 MACH_HMC800 HMC800 2195 +sholes MACH_SHOLES SHOLES 2196 +btmxc31 MACH_BTMXC31 BTMXC31 2197 +dt501 MACH_DT501 DT501 2198 +ktx MACH_KTX KTX 2199 +omap3517evm MACH_OMAP3517EVM OMAP3517EVM 2200 +netspace_v2 MACH_NETSPACE_V2 NETSPACE_V2 2201 +netspace_max_v2 MACH_NETSPACE_MAX_V2 NETSPACE_MAX_V2 2202 +d2net_v2 MACH_D2NET_V2 D2NET_V2 2203 +net2big_v2 MACH_NET2BIG_V2 NET2BIG_V2 2204 +net4big_v2 MACH_NET4BIG_V2 NET4BIG_V2 2205 +net5big_v2 MACH_NET5BIG_V2 NET5BIG_V2 2206 +endb2443 MACH_ENDB2443 ENDB2443 2207 +inetspace_v2 MACH_INETSPACE_V2 INETSPACE_V2 2208 +tros MACH_TROS TROS 2209 +pelco_homer MACH_PELCO_HOMER PELCO_HOMER 2210 +ofsp8 MACH_OFSP8 OFSP8 2211 +at91sam9g45ekes MACH_AT91SAM9G45EKES AT91SAM9G45EKES 2212 +guf_cupid MACH_GUF_CUPID GUF_CUPID 2213 +eab1r MACH_EAB1R EAB1R 2214 +desirec MACH_DESIREC DESIREC 2215 +cordoba MACH_CORDOBA CORDOBA 2216 +irvine MACH_IRVINE IRVINE 2217 +sff772 MACH_SFF772 SFF772 2218 +pelco_milano MACH_PELCO_MILANO PELCO_MILANO 2219 +pc7302 MACH_PC7302 PC7302 2220 +bip6000 MACH_BIP6000 BIP6000 2221 +silvermoon MACH_SILVERMOON SILVERMOON 2222 +vc0830 MACH_VC0830 VC0830 2223 +dt430 MACH_DT430 DT430 2224 +ji42pf MACH_JI42PF JI42PF 2225 +gnet_ksm MACH_GNET_KSM GNET_KSM 2226 +gnet_sgm MACH_GNET_SGM GNET_SGM 2227 +gnet_sgr MACH_GNET_SGR GNET_SGR 2228 +omap3_icetekevm MACH_OMAP3_ICETEKEVM OMAP3_ICETEKEVM 2229 +pnp MACH_PNP PNP 2230 +ctera_2bay_k MACH_CTERA_2BAY_K CTERA_2BAY_K 2231 +ctera_2bay_u MACH_CTERA_2BAY_U CTERA_2BAY_U 2232 +sas_c MACH_SAS_C SAS_C 2233 +vma2315 MACH_VMA2315 VMA2315 2234 +vcs MACH_VCS VCS 2235 +spear600 MACH_SPEAR600 SPEAR600 2236 +spear300 MACH_SPEAR300 SPEAR300 2237 +spear1300 MACH_SPEAR1300 SPEAR1300 2238 +lilly1131 MACH_LILLY1131 LILLY1131 2239 +arvoo_ax301 MACH_ARVOO_AX301 ARVOO_AX301 2240 +mapphone MACH_MAPPHONE MAPPHONE 2241 +legend MACH_LEGEND LEGEND 2242 +salsa MACH_SALSA SALSA 2243 +lounge MACH_LOUNGE LOUNGE 2244 +vision MACH_VISION VISION 2245 +vmb20 MACH_VMB20 VMB20 2246 +hy2410 MACH_HY2410 HY2410 2247 +hy9315 MACH_HY9315 HY9315 2248 +bullwinkle MACH_BULLWINKLE BULLWINKLE 2249 +arm_ultimator2 MACH_ARM_ULTIMATOR2 ARM_ULTIMATOR2 2250 +vs_v210 MACH_VS_V210 VS_V210 2252 +vs_v212 MACH_VS_V212 VS_V212 2253 +hmt MACH_HMT HMT 2254 +suen3 MACH_SUEN3 SUEN3 2255 +vesper MACH_VESPER VESPER 2256 +str9 MACH_STR9 STR9 2257 +omap3_wl_ff MACH_OMAP3_WL_FF OMAP3_WL_FF 2258 +simcom MACH_SIMCOM SIMCOM 2259 +mcwebio MACH_MCWEBIO MCWEBIO 2260 diff --git a/arch/blackfin/include/asm/.gitignore b/arch/blackfin/include/asm/.gitignore deleted file mode 100644 index 7858564a4466..000000000000 --- a/arch/blackfin/include/asm/.gitignore +++ /dev/null @@ -1 +0,0 @@ -+mach diff --git a/arch/blackfin/include/asm/flat.h b/arch/blackfin/include/asm/flat.h index e70074e05f4e..733a178d782d 100644 --- a/arch/blackfin/include/asm/flat.h +++ b/arch/blackfin/include/asm/flat.h @@ -10,7 +10,6 @@ #include <asm/unaligned.h> -#define flat_stack_align(sp) /* nothing needed */ #define flat_argvp_envp_on_stack() 0 #define flat_old_ram_flag(flags) (flags) diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h index 1e57b636e0bc..cf5066d3efd2 100644 --- a/arch/blackfin/include/asm/unistd.h +++ b/arch/blackfin/include/asm/unistd.h @@ -378,8 +378,10 @@ #define __NR_dup3 363 #define __NR_pipe2 364 #define __NR_inotify_init1 365 +#define __NR_preadv 366 +#define __NR_pwritev 367 -#define __NR_syscall 366 +#define __NR_syscall 368 #define NR_syscalls __NR_syscall /* Old optional stuff no one actually uses */ diff --git a/arch/blackfin/kernel/.gitignore b/arch/blackfin/kernel/.gitignore new file mode 100644 index 000000000000..c5f676c3c224 --- /dev/null +++ b/arch/blackfin/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds diff --git a/arch/blackfin/lib/strncmp.c b/arch/blackfin/lib/strncmp.c index 2aaae78a68e0..46518b1d2983 100644 --- a/arch/blackfin/lib/strncmp.c +++ b/arch/blackfin/lib/strncmp.c @@ -8,9 +8,8 @@ #define strncmp __inline_strncmp #include <asm/string.h> -#undef strncmp - #include <linux/module.h> +#undef strncmp int strncmp(const char *cs, const char *ct, size_t count) { diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index 21e65a339a22..a063a434f7e3 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -1581,6 +1581,8 @@ ENTRY(_sys_call_table) .long _sys_dup3 .long _sys_pipe2 .long _sys_inotify_init1 /* 365 */ + .long _sys_preadv + .long _sys_pwritev .rept NR_syscalls-(.-_sys_call_table)/4 .long _sys_ni_syscall diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c index df3925cb1c7f..d70b445f4a8f 100644 --- a/arch/cris/arch-v32/kernel/irq.c +++ b/arch/cris/arch-v32/kernel/irq.c @@ -325,12 +325,14 @@ static void end_crisv32_irq(unsigned int irq) { } -void set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest) +int set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest) { unsigned long flags; spin_lock_irqsave(&irq_lock, flags); irq_allocations[irq - FIRST_IRQ].mask = *dest; spin_unlock_irqrestore(&irq_lock, flags); + + return 0; } static struct irq_chip crisv32_irq_type = { diff --git a/arch/h8300/include/asm/flat.h b/arch/h8300/include/asm/flat.h index 2a873508a9a1..bd12b31b90e6 100644 --- a/arch/h8300/include/asm/flat.h +++ b/arch/h8300/include/asm/flat.h @@ -5,7 +5,6 @@ #ifndef __H8300_FLAT_H__ #define __H8300_FLAT_H__ -#define flat_stack_align(sp) /* nothing needed */ #define flat_argvp_envp_on_stack() 1 #define flat_old_ram_flag(flags) 1 #define flat_reloc_valid(reloc, size) ((reloc) <= (size)) diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c index cc0a3182db3c..acb5047ab573 100644 --- a/arch/ia64/hp/sim/hpsim_irq.c +++ b/arch/ia64/hp/sim/hpsim_irq.c @@ -21,9 +21,10 @@ hpsim_irq_noop (unsigned int irq) { } -static void +static int hpsim_set_affinity_noop(unsigned int a, const struct cpumask *b) { + return 0; } static struct hw_interrupt_type irq_type_hp_sim = { diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 5510317db37b..baec6f00f7f3 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -636,7 +636,7 @@ void __init acpi_numa_arch_fixup(void) * success: return IRQ number (>=0) * failure: return < 0 */ -int acpi_register_gsi(u32 gsi, int triggering, int polarity) +int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity) { if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM) return gsi; @@ -678,7 +678,8 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table) fadt = (struct acpi_table_fadt *)fadt_header; - acpi_register_gsi(fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW); + acpi_register_gsi(NULL, fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE, + ACPI_ACTIVE_LOW); return 0; } diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 166e0d839fa0..f92cef47bf86 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -329,7 +329,7 @@ unmask_irq (unsigned int irq) } -static void +static int iosapic_set_affinity(unsigned int irq, const struct cpumask *mask) { #ifdef CONFIG_SMP @@ -343,15 +343,15 @@ iosapic_set_affinity(unsigned int irq, const struct cpumask *mask) cpu = cpumask_first_and(cpu_online_mask, mask); if (cpu >= nr_cpu_ids) - return; + return -1; if (irq_prepare_move(irq, cpu)) - return; + return -1; dest = cpu_physical_id(cpu); if (!iosapic_intr_info[irq].count) - return; /* not an IOSAPIC interrupt */ + return -1; /* not an IOSAPIC interrupt */ set_irq_affinity_info(irq, dest, redir); @@ -376,7 +376,9 @@ iosapic_set_affinity(unsigned int irq, const struct cpumask *mask) iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32); iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32); } + #endif + return 0; } /* diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 2b15e233f7fe..0f8ade9331ba 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -12,7 +12,7 @@ static struct irq_chip ia64_msi_chip; #ifdef CONFIG_SMP -static void ia64_set_msi_irq_affinity(unsigned int irq, +static int ia64_set_msi_irq_affinity(unsigned int irq, const cpumask_t *cpu_mask) { struct msi_msg msg; @@ -20,10 +20,10 @@ static void ia64_set_msi_irq_affinity(unsigned int irq, int cpu = first_cpu(*cpu_mask); if (!cpu_online(cpu)) - return; + return -1; if (irq_prepare_move(irq, cpu)) - return; + return -1; read_msi_msg(irq, &msg); @@ -39,6 +39,8 @@ static void ia64_set_msi_irq_affinity(unsigned int irq, write_msi_msg(irq, &msg); cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu)); + + return 0; } #endif /* CONFIG_SMP */ @@ -130,17 +132,17 @@ void arch_teardown_msi_irq(unsigned int irq) #ifdef CONFIG_DMAR #ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) +static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_cfg *cfg = irq_cfg + irq; struct msi_msg msg; int cpu = cpumask_first(mask); if (!cpu_online(cpu)) - return; + return -1; if (irq_prepare_move(irq, cpu)) - return; + return -1; dmar_msi_read(irq, &msg); @@ -151,6 +153,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) dmar_msi_write(irq, &msg); cpumask_copy(irq_desc[irq].affinity, mask); + + return 0; } #endif /* CONFIG_SMP */ diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 66fd705e82c0..764f26abac05 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -227,7 +227,7 @@ finish_up: return new_irq_info; } -static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask) +static int sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask) { struct sn_irq_info *sn_irq_info, *sn_irq_info_safe; nasid_t nasid; @@ -239,6 +239,8 @@ static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask) list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, sn_irq_lh[irq], list) (void)sn_retarget_vector(sn_irq_info, nasid, slice); + + return 0; } #ifdef CONFIG_SMP diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c index 81e428943d73..fbbfb9701201 100644 --- a/arch/ia64/sn/kernel/msi_sn.c +++ b/arch/ia64/sn/kernel/msi_sn.c @@ -151,7 +151,7 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry) } #ifdef CONFIG_SMP -static void sn_set_msi_irq_affinity(unsigned int irq, +static int sn_set_msi_irq_affinity(unsigned int irq, const struct cpumask *cpu_mask) { struct msi_msg msg; @@ -168,7 +168,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, cpu = cpumask_first(cpu_mask); sn_irq_info = sn_msi_info[irq].sn_irq_info; if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0) - return; + return -1; /* * Release XIO resources for the old MSI PCI address @@ -189,7 +189,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, new_irq_info = sn_retarget_vector(sn_irq_info, nasid, slice); sn_msi_info[irq].sn_irq_info = new_irq_info; if (new_irq_info == NULL) - return; + return -1; /* * Map the xio address into bus space @@ -206,6 +206,8 @@ static void sn_set_msi_irq_affinity(unsigned int irq, write_msi_msg(irq, &msg); cpumask_copy(irq_desc[irq].affinity, cpu_mask); + + return 0; } #endif /* CONFIG_SMP */ diff --git a/arch/m32r/include/asm/flat.h b/arch/m32r/include/asm/flat.h index d851cf0c4aa5..5d711c4688fb 100644 --- a/arch/m32r/include/asm/flat.h +++ b/arch/m32r/include/asm/flat.h @@ -12,7 +12,6 @@ #ifndef __ASM_M32R_FLAT_H #define __ASM_M32R_FLAT_H -#define flat_stack_align(sp) (*sp += (*sp & 3 ? (4 - (*sp & 3)): 0)) #define flat_argvp_envp_on_stack() 0 #define flat_old_ram_flag(flags) (flags) #define flat_set_persistent(relval, p) 0 diff --git a/arch/m68k/include/asm/flat.h b/arch/m68k/include/asm/flat.h index 814b5174a8e0..a0e290793978 100644 --- a/arch/m68k/include/asm/flat.h +++ b/arch/m68k/include/asm/flat.h @@ -5,7 +5,6 @@ #ifndef __M68KNOMMU_FLAT_H__ #define __M68KNOMMU_FLAT_H__ -#define flat_stack_align(sp) /* nothing needed */ #define flat_argvp_envp_on_stack() 1 #define flat_old_ram_flag(flags) (flags) #define flat_reloc_valid(reloc, size) ((reloc) <= (size)) diff --git a/arch/microblaze/configs/nommu_defconfig b/arch/microblaze/configs/nommu_defconfig index beb7ecd72793..4ef6af0a8f31 100644 --- a/arch/microblaze/configs/nommu_defconfig +++ b/arch/microblaze/configs/nommu_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Tue Mar 24 10:23:20 2009 +# Linux kernel version: 2.6.30-rc5 +# Mon May 11 09:01:02 2009 # CONFIG_MICROBLAZE=y # CONFIG_SWAP is not set @@ -32,6 +32,7 @@ CONFIG_LOCALVERSION_AUTO=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y # CONFIG_TASKSTATS is not set @@ -63,6 +64,7 @@ CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y +# CONFIG_STRIP_ASM_SYMS is not set # CONFIG_HOTPLUG is not set CONFIG_PRINTK=y CONFIG_BUG=y @@ -80,6 +82,8 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set +# CONFIG_SLOW_WORK is not set # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -92,7 +96,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -166,6 +169,8 @@ CONFIG_SPLIT_PTLOCK_CPUS=4 # CONFIG_PHYS_ADDR_T_64BIT is not set CONFIG_ZONE_DMA_FLAG=0 CONFIG_VIRT_TO_BUS=y +CONFIG_UNEVICTABLE_LRU=y +CONFIG_NOMMU_INITIAL_TRIM_EXCESS=1 # # Exectuable file formats @@ -180,7 +185,6 @@ CONFIG_NET=y # # Networking options # -CONFIG_COMPAT_NET_DEV_OPS=y CONFIG_PACKET=y # CONFIG_PACKET_MMAP is not set CONFIG_UNIX=y @@ -232,6 +236,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_LAPB is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set # CONFIG_NET_SCHED is not set # CONFIG_DCB is not set @@ -244,7 +249,6 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set -# CONFIG_PHONET is not set CONFIG_WIRELESS=y # CONFIG_CFG80211 is not set CONFIG_WIRELESS_OLD_REGULATORY=y @@ -379,6 +383,7 @@ CONFIG_MISC_DEVICES=y # CONFIG_ATA is not set # CONFIG_MD is not set CONFIG_NETDEVICES=y +CONFIG_COMPAT_NET_DEV_OPS=y # CONFIG_DUMMY is not set # CONFIG_BONDING is not set # CONFIG_MACVLAN is not set @@ -388,6 +393,7 @@ CONFIG_NETDEVICES=y # CONFIG_PHYLIB is not set CONFIG_NET_ETHERNET=y # CONFIG_MII is not set +# CONFIG_ETHOC is not set # CONFIG_DNET is not set # CONFIG_IBM_NEW_EMAC_ZMII is not set # CONFIG_IBM_NEW_EMAC_RGMII is not set @@ -405,7 +411,6 @@ CONFIG_NETDEV_10000=y # # CONFIG_WLAN_PRE80211 is not set # CONFIG_WLAN_80211 is not set -# CONFIG_IWLWIFI_LEDS is not set # # Enable WiMAX (Networking options) to see the WiMAX drivers @@ -455,6 +460,7 @@ CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 # CONFIG_IPMI_HANDLER is not set CONFIG_HW_RANDOM=y +# CONFIG_HW_RANDOM_TIMERIOMEM is not set # CONFIG_RTC is not set # CONFIG_GEN_RTC is not set # CONFIG_R3964 is not set @@ -525,7 +531,7 @@ CONFIG_USB_SUPPORT=y # # -# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed; +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may # # CONFIG_USB_GADGET is not set @@ -538,6 +544,7 @@ CONFIG_USB_SUPPORT=y # CONFIG_ACCESSIBILITY is not set # CONFIG_RTC_CLASS is not set # CONFIG_DMADEVICES is not set +# CONFIG_AUXDISPLAY is not set # CONFIG_UIO is not set # CONFIG_STAGING is not set @@ -563,6 +570,11 @@ CONFIG_FILE_LOCKING=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -601,8 +613,13 @@ CONFIG_CRAMFS=y # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set CONFIG_ROMFS_FS=y +CONFIG_ROMFS_BACKED_BY_BLOCK=y +# CONFIG_ROMFS_BACKED_BY_MTD is not set +# CONFIG_ROMFS_BACKED_BY_BOTH is not set +CONFIG_ROMFS_ON_BLOCK=y # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -614,7 +631,6 @@ CONFIG_LOCKD_V4=y CONFIG_NFS_ACL_SUPPORT=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y -# CONFIG_SUNRPC_REGISTER_V4 is not set # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set @@ -647,6 +663,9 @@ CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_SOFTLOCKUP=y CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=1 +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 CONFIG_SCHED_DEBUG=y CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y @@ -678,15 +697,8 @@ CONFIG_DEBUG_SG=y # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set # CONFIG_FAULT_INJECTION is not set CONFIG_SYSCTL_SYSCALL_CHECK=y - -# -# Tracers -# -# CONFIG_SCHED_TRACER is not set -# CONFIG_CONTEXT_SWITCH_TRACER is not set -# CONFIG_BOOT_TRACER is not set -# CONFIG_TRACE_BRANCH_PROFILING is not set -# CONFIG_DYNAMIC_PRINTK_DEBUG is not set +# CONFIG_PAGE_POISONING is not set +# CONFIG_DYNAMIC_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_EARLY_PRINTK=y CONFIG_HEART_BEAT=y @@ -777,6 +789,7 @@ CONFIG_CRYPTO=y # Compression # # CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_ZLIB is not set # CONFIG_CRYPTO_LZO is not set # @@ -784,6 +797,7 @@ CONFIG_CRYPTO=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y +# CONFIG_BINARY_PRINTF is not set # # Library routines @@ -797,8 +811,8 @@ CONFIG_GENERIC_FIND_LAST_BIT=y # CONFIG_CRC7 is not set # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y -CONFIG_PLIST=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT=y CONFIG_HAS_DMA=y CONFIG_HAVE_LMB=y +CONFIG_NLATTR=y diff --git a/arch/microblaze/kernel/intc.c b/arch/microblaze/kernel/intc.c index a69d3e3c2fd4..b15605299a57 100644 --- a/arch/microblaze/kernel/intc.c +++ b/arch/microblaze/kernel/intc.c @@ -137,8 +137,8 @@ void __init init_IRQ(void) intr_type = *(int *) of_get_property(intc, "xlnx,kind-of-intr", NULL); - if (intr_type >= (1 << nr_irq)) - printk(KERN_INFO " ERROR: Mishmash in king-of-intr param\n"); + if (intr_type >= (1 << (nr_irq + 1))) + printk(KERN_INFO " ERROR: Mismatch in kind-of-intr param\n"); #ifdef CONFIG_SELFMOD_INTC selfmod_function((int *) arr_func, intc_baseaddr); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 09b1287a92ce..25f3b0a11ca8 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -72,6 +72,7 @@ config MIPS_COBALT select IRQ_CPU select IRQ_GT641XX select PCI_GT64XXX_PCI0 + select PCI select SYS_HAS_CPU_NEVADA select SYS_HAS_EARLY_PRINTK select SYS_SUPPORTS_32BIT_KERNEL @@ -593,7 +594,7 @@ config WR_PPMC board, which is based on GT64120 bridge chip. config CAVIUM_OCTEON_SIMULATOR - bool "Support for the Cavium Networks Octeon Simulator" + bool "Cavium Networks Octeon Simulator" select CEVT_R4K select 64BIT_PHYS_ADDR select DMA_COHERENT @@ -607,7 +608,7 @@ config CAVIUM_OCTEON_SIMULATOR hardware. config CAVIUM_OCTEON_REFERENCE_BOARD - bool "Support for the Cavium Networks Octeon reference board" + bool "Cavium Networks Octeon reference board" select CEVT_R4K select 64BIT_PHYS_ADDR select DMA_COHERENT diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 26947ab85260..c4cae9e6b802 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -473,12 +473,12 @@ endif # Simplified: what IP22 does at 128MB+ in ksegN, IP28 does at 512MB+ in xkphys # ifdef CONFIG_SGI_IP28 - ifeq ($(call cc-option-yn,-mr10k-cache-barrier=1), n) - $(error gcc doesn't support needed option -mr10k-cache-barrier=1) + ifeq ($(call cc-option-yn,-mr10k-cache-barrier=store), n) + $(error gcc doesn't support needed option -mr10k-cache-barrier=store) endif endif core-$(CONFIG_SGI_IP28) += arch/mips/sgi-ip22/ -cflags-$(CONFIG_SGI_IP28) += -mr10k-cache-barrier=1 -I$(srctree)/arch/mips/include/asm/mach-ip28 +cflags-$(CONFIG_SGI_IP28) += -mr10k-cache-barrier=store -I$(srctree)/arch/mips/include/asm/mach-ip28 load-$(CONFIG_SGI_IP28) += 0xa800000020004000 # diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 1c19af8daa62..d3a0c8154bec 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -177,7 +177,7 @@ static void octeon_irq_ciu0_disable(unsigned int irq) } #ifdef CONFIG_SMP -static void octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest) +static int octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest) { int cpu; int bit = irq - OCTEON_IRQ_WORKQ0; /* Bit 0-63 of EN0 */ @@ -199,6 +199,8 @@ static void octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask */ cvmx_read_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2)); write_unlock(&octeon_irq_ciu0_rwlock); + + return 0; } #endif @@ -292,7 +294,7 @@ static void octeon_irq_ciu1_disable(unsigned int irq) } #ifdef CONFIG_SMP -static void octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest) +static int octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest) { int cpu; int bit = irq - OCTEON_IRQ_WDOG0; /* Bit 0-63 of EN1 */ @@ -315,6 +317,8 @@ static void octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask */ cvmx_read_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num() * 2 + 1)); write_unlock(&octeon_irq_ciu1_rwlock); + + return 0; } #endif diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h index 744cd8fb107f..126044308dec 100644 --- a/arch/mips/include/asm/cpu-info.h +++ b/arch/mips/include/asm/cpu-info.h @@ -39,8 +39,8 @@ struct cache_desc { #define MIPS_CACHE_PINDEX 0x00000020 /* Physically indexed cache */ struct cpuinfo_mips { - unsigned long udelay_val; - unsigned long asid_cache; + unsigned int udelay_val; + unsigned int asid_cache; /* * Capability and feature descriptor structure for MIPS CPU diff --git a/arch/mips/include/asm/delay.h b/arch/mips/include/asm/delay.h index b0bccd2c4ed5..a07e51b2be13 100644 --- a/arch/mips/include/asm/delay.h +++ b/arch/mips/include/asm/delay.h @@ -11,94 +11,12 @@ #ifndef _ASM_DELAY_H #define _ASM_DELAY_H -#include <linux/param.h> -#include <linux/smp.h> +extern void __delay(unsigned int loops); +extern void __ndelay(unsigned int ns); +extern void __udelay(unsigned int us); -#include <asm/compiler.h> -#include <asm/war.h> - -static inline void __delay(unsigned long loops) -{ - if (sizeof(long) == 4) - __asm__ __volatile__ ( - " .set noreorder \n" - " .align 3 \n" - "1: bnez %0, 1b \n" - " subu %0, 1 \n" - " .set reorder \n" - : "=r" (loops) - : "0" (loops)); - else if (sizeof(long) == 8 && !DADDI_WAR) - __asm__ __volatile__ ( - " .set noreorder \n" - " .align 3 \n" - "1: bnez %0, 1b \n" - " dsubu %0, 1 \n" - " .set reorder \n" - : "=r" (loops) - : "0" (loops)); - else if (sizeof(long) == 8 && DADDI_WAR) - __asm__ __volatile__ ( - " .set noreorder \n" - " .align 3 \n" - "1: bnez %0, 1b \n" - " dsubu %0, %2 \n" - " .set reorder \n" - : "=r" (loops) - : "0" (loops), "r" (1)); -} - - -/* - * Division by multiplication: you don't have to worry about - * loss of precision. - * - * Use only for very small delays ( < 1 msec). Should probably use a - * lookup table, really, as the multiplications take much too long with - * short delays. This is a "reasonable" implementation, though (and the - * first constant multiplications gets optimized away if the delay is - * a constant) - */ - -static inline void __udelay(unsigned long usecs, unsigned long lpj) -{ - unsigned long hi, lo; - - /* - * The rates of 128 is rounded wrongly by the catchall case - * for 64-bit. Excessive precission? Probably ... - */ -#if defined(CONFIG_64BIT) && (HZ == 128) - usecs *= 0x0008637bd05af6c7UL; /* 2**64 / (1000000 / HZ) */ -#elif defined(CONFIG_64BIT) - usecs *= (0x8000000000000000UL / (500000 / HZ)); -#else /* 32-bit junk follows here */ - usecs *= (unsigned long) (((0x8000000000000000ULL / (500000 / HZ)) + - 0x80000000ULL) >> 32); -#endif - - if (sizeof(long) == 4) - __asm__("multu\t%2, %3" - : "=h" (usecs), "=l" (lo) - : "r" (usecs), "r" (lpj) - : GCC_REG_ACCUM); - else if (sizeof(long) == 8 && !R4000_WAR) - __asm__("dmultu\t%2, %3" - : "=h" (usecs), "=l" (lo) - : "r" (usecs), "r" (lpj) - : GCC_REG_ACCUM); - else if (sizeof(long) == 8 && R4000_WAR) - __asm__("dmultu\t%3, %4\n\tmfhi\t%0" - : "=r" (usecs), "=h" (hi), "=l" (lo) - : "r" (usecs), "r" (lpj) - : GCC_REG_ACCUM); - - __delay(usecs); -} - -#define __udelay_val cpu_data[raw_smp_processor_id()].udelay_val - -#define udelay(usecs) __udelay((usecs), __udelay_val) +#define ndelay(ns) __udelay(ns) +#define udelay(us) __udelay(us) /* make sure "usecs *= ..." in udelay do not overflow. */ #if HZ >= 1000 diff --git a/arch/mips/include/asm/ioctl.h b/arch/mips/include/asm/ioctl.h index 85067e248a83..916163401b2c 100644 --- a/arch/mips/include/asm/ioctl.h +++ b/arch/mips/include/asm/ioctl.h @@ -60,12 +60,16 @@ ((nr) << _IOC_NRSHIFT) | \ ((size) << _IOC_SIZESHIFT)) +#ifdef __KERNEL__ /* provoke compile error for invalid uses of size argument */ extern unsigned int __invalid_size_argument_for_IOC; #define _IOC_TYPECHECK(t) \ ((sizeof(t) == sizeof(t[1]) && \ sizeof(t) < (1 << _IOC_SIZEBITS)) ? \ sizeof(t) : __invalid_size_argument_for_IOC) +#else +#define _IOC_TYPECHECK(t) (sizeof(t)) +#endif /* used to create numbers */ #define _IO(type, nr) _IOC(_IOC_NONE, (type), (nr), 0) diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index 3214ade02d10..4f1eed107b08 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -49,7 +49,7 @@ static inline void smtc_im_ack_irq(unsigned int irq) #ifdef CONFIG_MIPS_MT_SMTC_IRQAFF #include <linux/cpumask.h> -extern void plat_set_irq_affinity(unsigned int irq, +extern int plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity); extern void smtc_forward_irq(unsigned int irq); diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index 8de858f5449f..c2d53c18fd36 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -956,7 +956,7 @@ __clear_user(void __user *addr, __kernel_size_t size) void __user * __cl_addr = (addr); \ unsigned long __cl_size = (n); \ if (__cl_size && access_ok(VERIFY_WRITE, \ - ((unsigned long)(__cl_addr)), __cl_size)) \ + __cl_addr, __cl_size)) \ __cl_size = __clear_user(__cl_addr, __cl_size); \ __cl_size; \ }) diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c index 87deb8f6c458..3f43c2e3aa5a 100644 --- a/arch/mips/kernel/irq-gic.c +++ b/arch/mips/kernel/irq-gic.c @@ -155,7 +155,7 @@ static void gic_unmask_irq(unsigned int irq) static DEFINE_SPINLOCK(gic_lock); -static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) +static int gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) { cpumask_t tmp = CPU_MASK_NONE; unsigned long flags; @@ -166,7 +166,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) cpumask_and(&tmp, cpumask, cpu_online_mask); if (cpus_empty(tmp)) - return; + return -1; /* Assumption : cpumask refers to a single CPU */ spin_lock_irqsave(&gic_lock, flags); @@ -190,6 +190,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) cpumask_copy(irq_desc[irq].affinity, cpumask); spin_unlock_irqrestore(&gic_lock, flags); + return 0; } #endif diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c index 26760cad8b69..e0a4ac18fa07 100644 --- a/arch/mips/kernel/proc.c +++ b/arch/mips/kernel/proc.c @@ -42,7 +42,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, fmt, __cpu_name[n], (version >> 4) & 0x0f, version & 0x0f, (fp_vers >> 4) & 0x0f, fp_vers & 0x0f); - seq_printf(m, "BogoMIPS\t\t: %lu.%02lu\n", + seq_printf(m, "BogoMIPS\t\t: %u.%02u\n", cpu_data[n].udelay_val / (500000/HZ), (cpu_data[n].udelay_val / (5000/HZ)) % 100); seq_printf(m, "wait instruction\t: %s\n", cpu_wait ? "yes" : "no"); diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index c13c7ad2cdae..2adead5a8a37 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile @@ -2,8 +2,8 @@ # Makefile for MIPS-specific library files.. # -lib-y += csum_partial.o memcpy.o memcpy-inatomic.o memset.o strlen_user.o \ - strncpy_user.o strnlen_user.o uncached.o +lib-y += csum_partial.o delay.o memcpy.o memcpy-inatomic.o memset.o \ + strlen_user.o strncpy_user.o strnlen_user.o uncached.o obj-y += iomap.o obj-$(CONFIG_PCI) += iomap-pci.o diff --git a/arch/mips/lib/delay.c b/arch/mips/lib/delay.c new file mode 100644 index 000000000000..f69c6b569eb3 --- /dev/null +++ b/arch/mips/lib/delay.c @@ -0,0 +1,56 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1994 by Waldorf Electronics + * Copyright (C) 1995 - 2000, 01, 03 by Ralf Baechle + * Copyright (C) 1999, 2000 Silicon Graphics, Inc. + * Copyright (C) 2007 Maciej W. Rozycki + */ +#include <linux/module.h> +#include <linux/param.h> +#include <linux/smp.h> + +#include <asm/compiler.h> +#include <asm/war.h> + +inline void __delay(unsigned int loops) +{ + __asm__ __volatile__ ( + " .set noreorder \n" + " .align 3 \n" + "1: bnez %0, 1b \n" + " subu %0, 1 \n" + " .set reorder \n" + : "=r" (loops) + : "0" (loops)); +} +EXPORT_SYMBOL(__delay); + +/* + * Division by multiplication: you don't have to worry about + * loss of precision. + * + * Use only for very small delays ( < 1 msec). Should probably use a + * lookup table, really, as the multiplications take much too long with + * short delays. This is a "reasonable" implementation, though (and the + * first constant multiplications gets optimized away if the delay is + * a constant) + */ + +void __udelay(unsigned long us) +{ + unsigned int lpj = current_cpu_data.udelay_val; + + __delay((us * 0x000010c7 * HZ * lpj) >> 32); +} +EXPORT_SYMBOL(__udelay); + +void __ndelay(unsigned long ns) +{ + unsigned int lpj = current_cpu_data.udelay_val; + + __delay((us * 0x00000005 * HZ * lpj) >> 32); +} +EXPORT_SYMBOL(__ndelay); diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c index f0cf46adb978..1c0048a6f5cf 100644 --- a/arch/mips/mm/tlb-r3k.c +++ b/arch/mips/mm/tlb-r3k.c @@ -82,8 +82,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, int cpu = smp_processor_id(); if (cpu_context(cpu, mm) != 0) { - unsigned long flags; - int size; + unsigned long size, flags; #ifdef DEBUG_TLB printk("[tlbrange<%lu,0x%08lx,0x%08lx>]", @@ -121,8 +120,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) { - unsigned long flags; - int size; + unsigned long size, flags; #ifdef DEBUG_TLB printk("[tlbrange<%lu,0x%08lx,0x%08lx>]", start, end); diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 9619f66e531e..892be426787c 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -117,8 +117,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, int cpu = smp_processor_id(); if (cpu_context(cpu, mm) != 0) { - unsigned long flags; - int size; + unsigned long size, flags; ENTER_CRITICAL(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; @@ -160,8 +159,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) { - unsigned long flags; - int size; + unsigned long size, flags; ENTER_CRITICAL(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; diff --git a/arch/mips/mm/tlb-r8k.c b/arch/mips/mm/tlb-r8k.c index 4f01a3be215c..4ec95cc2df2f 100644 --- a/arch/mips/mm/tlb-r8k.c +++ b/arch/mips/mm/tlb-r8k.c @@ -111,8 +111,7 @@ out_restore: /* Usable for KV1 addresses only! */ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) { - unsigned long flags; - int size; + unsigned long size, flags; size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c index 5ba31888fefb..499ffe5475df 100644 --- a/arch/mips/mti-malta/malta-smtc.c +++ b/arch/mips/mti-malta/malta-smtc.c @@ -114,7 +114,7 @@ struct plat_smp_ops msmtc_smp_ops = { */ -void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) +int plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) { cpumask_t tmask; int cpu = 0; @@ -156,5 +156,7 @@ void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) /* Do any generic SMTC IRQ affinity setup */ smtc_set_irq_affinity(irq, tmask); + + return 0; } #endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */ diff --git a/arch/mips/sgi-ip22/ip22-reset.c b/arch/mips/sgi-ip22/ip22-reset.c index 4ad5c3393fd3..45b6694c2079 100644 --- a/arch/mips/sgi-ip22/ip22-reset.c +++ b/arch/mips/sgi-ip22/ip22-reset.c @@ -148,7 +148,7 @@ static irqreturn_t panel_int(int irq, void *dev_id) if (sgint->istat1 & SGINT_ISTAT1_PWR) { /* Wait until interrupt goes away */ - disable_irq(SGI_PANEL_IRQ); + disable_irq_nosync(SGI_PANEL_IRQ); init_timer(&debounce_timer); debounce_timer.function = debounce; debounce_timer.expires = jiffies + 5; diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c index b6cab089561e..9b95d80ebc6e 100644 --- a/arch/mips/sgi-ip32/ip32-reset.c +++ b/arch/mips/sgi-ip32/ip32-reset.c @@ -53,7 +53,7 @@ static inline void ip32_machine_halt(void) static void ip32_machine_power_off(void) { - volatile unsigned char reg_a, xctrl_a, xctrl_b; + unsigned char reg_a, xctrl_a, xctrl_b; disable_irq(MACEISA_RTC_IRQ); reg_a = CMOS_READ(RTC_REG_A); @@ -91,9 +91,10 @@ static void blink_timeout(unsigned long data) static void debounce(unsigned long data) { - volatile unsigned char reg_a, reg_c, xctrl_a; + unsigned char reg_a, reg_c, xctrl_a; reg_c = CMOS_READ(RTC_INTR_FLAGS); + reg_a = CMOS_READ(RTC_REG_A); CMOS_WRITE(reg_a | DS_REGA_DV0, RTC_REG_A); wbflush(); xctrl_a = CMOS_READ(DS_B1_XCTRL4A); @@ -137,7 +138,7 @@ static inline void ip32_power_button(void) static irqreturn_t ip32_rtc_int(int irq, void *dev_id) { - volatile unsigned char reg_c; + unsigned char reg_c; reg_c = CMOS_READ(RTC_INTR_FLAGS); if (!(reg_c & RTC_IRQF)) { @@ -145,7 +146,7 @@ static irqreturn_t ip32_rtc_int(int irq, void *dev_id) "%s: RTC IRQ without RTC_IRQF\n", __func__); } /* Wait until interrupt goes away */ - disable_irq(MACEISA_RTC_IRQ); + disable_irq_nosync(MACEISA_RTC_IRQ); init_timer(&debounce_timer); debounce_timer.function = debounce; debounce_timer.expires = jiffies + 50; diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c index c147c4b35d3f..690de06bde90 100644 --- a/arch/mips/sibyte/bcm1480/irq.c +++ b/arch/mips/sibyte/bcm1480/irq.c @@ -50,7 +50,7 @@ static void enable_bcm1480_irq(unsigned int irq); static void disable_bcm1480_irq(unsigned int irq); static void ack_bcm1480_irq(unsigned int irq); #ifdef CONFIG_SMP -static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask); +static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask); #endif #ifdef CONFIG_PCI @@ -109,7 +109,7 @@ void bcm1480_unmask_irq(int cpu, int irq) } #ifdef CONFIG_SMP -static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask) +static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask) { int i = 0, old_cpu, cpu, int_on, k; u64 cur_ints; @@ -118,7 +118,7 @@ static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask) if (cpumask_weight(mask) != 1) { printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq); - return; + return -1; } i = cpumask_first(mask); @@ -152,6 +152,8 @@ static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask) } } spin_unlock_irqrestore(&bcm1480_imr_lock, flags); + + return 0; } #endif diff --git a/arch/mips/sibyte/cfe/setup.c b/arch/mips/sibyte/cfe/setup.c index 3de30f79db3f..eb5396cf81bb 100644 --- a/arch/mips/sibyte/cfe/setup.c +++ b/arch/mips/sibyte/cfe/setup.c @@ -288,13 +288,7 @@ void __init prom_init(void) */ cfe_cons_handle = cfe_getstdhandle(CFE_STDHANDLE_CONSOLE); if (cfe_getenv("LINUX_CMDLINE", arcs_cmdline, CL_SIZE) < 0) { - if (argc < 0) { - /* - * It's OK for direct boot to not provide a - * command line - */ - strcpy(arcs_cmdline, "root=/dev/ram0 "); - } else { + if (argc >= 0) { /* The loader should have set the command line */ /* too early for panic to do any good */ printk("LINUX_CMDLINE not defined in cfe."); diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c index 38cb998ade22..409dec798863 100644 --- a/arch/mips/sibyte/sb1250/irq.c +++ b/arch/mips/sibyte/sb1250/irq.c @@ -50,7 +50,7 @@ static void enable_sb1250_irq(unsigned int irq); static void disable_sb1250_irq(unsigned int irq); static void ack_sb1250_irq(unsigned int irq); #ifdef CONFIG_SMP -static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask); +static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask); #endif #ifdef CONFIG_SIBYTE_HAS_LDT @@ -103,7 +103,7 @@ void sb1250_unmask_irq(int cpu, int irq) } #ifdef CONFIG_SMP -static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask) +static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask) { int i = 0, old_cpu, cpu, int_on; u64 cur_ints; @@ -113,7 +113,7 @@ static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask) if (cpumask_weight(mask) > 1) { printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq); - return; + return -1; } /* Convert logical CPU to physical CPU */ @@ -143,6 +143,8 @@ static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask) R_IMR_INTERRUPT_MASK)); } spin_unlock_irqrestore(&sb1250_imr_lock, flags); + + return 0; } #endif diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 4ea4229d765c..8007f1e65729 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -130,15 +130,17 @@ int cpu_check_affinity(unsigned int irq, const struct cpumask *dest) return cpu_dest; } -static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest) +static int cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest) { int cpu_dest; cpu_dest = cpu_check_affinity(irq, dest); if (cpu_dest < 0) - return; + return -1; cpumask_copy(&irq_desc[irq].affinity, dest); + + return 0; } #endif diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a0d1146a0578..cdc9a6ff4be8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -868,6 +868,18 @@ config TASK_SIZE default "0x80000000" if PPC_PREP || PPC_8xx default "0xc0000000" +config CONSISTENT_SIZE_BOOL + bool "Set custom consistent memory pool size" + depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE + help + This option allows you to set the size of the + consistent memory pool. This pool of virtual memory + is used to make consistent memory allocations. + +config CONSISTENT_SIZE + hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL + default "0x00200000" if NOT_COHERENT_CACHE + config PIN_TLB bool "Pinned Kernel TLBs (860 ONLY)" depends on ADVANCED_OPTIONS && 8xx diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 5339bb44cce9..ea8870a34482 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.28-rc3 -# Tue Nov 11 19:36:51 2008 +# Linux kernel version: 2.6.30-rc7 +# Mon May 25 14:53:25 2009 # # CONFIG_PPC64 is not set @@ -14,6 +14,7 @@ CONFIG_6xx=y # CONFIG_40x is not set # CONFIG_44x is not set # CONFIG_E200 is not set +CONFIG_PPC_BOOK3S=y CONFIG_PPC_FPU=y CONFIG_ALTIVEC=y CONFIG_PPC_STD_MMU=y @@ -43,7 +44,7 @@ CONFIG_GENERIC_FIND_NEXT_BIT=y CONFIG_PPC=y CONFIG_EARLY_PRINTK=y CONFIG_GENERIC_NVRAM=y -CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_SCHED_OMIT_FRAME_POINTER=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_PPC_OF=y CONFIG_OF=y @@ -52,12 +53,14 @@ CONFIG_OF=y CONFIG_AUDIT_ARCH=y CONFIG_GENERIC_BUG=y CONFIG_SYS_SUPPORTS_APM_EMULATION=y +CONFIG_DTC=y # CONFIG_DEFAULT_UIMAGE is not set CONFIG_HIBERNATE_32=y CONFIG_ARCH_HIBERNATION_POSSIBLE=y CONFIG_ARCH_SUSPEND_POSSIBLE=y # CONFIG_PPC_DCR_NATIVE is not set # CONFIG_PPC_DCR_MMIO is not set +CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # @@ -72,14 +75,24 @@ CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set # CONFIG_AUDIT is not set + +# +# RCU Subsystem +# +CONFIG_CLASSIC_RCU=y +# CONFIG_TREE_RCU is not set +# CONFIG_PREEMPT_RCU is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CGROUPS is not set # CONFIG_GROUP_SCHED is not set +# CONFIG_CGROUPS is not set CONFIG_SYSFS_DEPRECATED=y CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_RELAY is not set @@ -88,23 +101,27 @@ CONFIG_NAMESPACES=y # CONFIG_IPC_NS is not set # CONFIG_USER_NS is not set # CONFIG_PID_NS is not set +# CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" +CONFIG_RD_GZIP=y +CONFIG_RD_BZIP2=y +CONFIG_RD_LZMA=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y # CONFIG_EMBEDDED is not set CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y -# CONFIG_COMPAT_BRK is not set CONFIG_BASE_FULL=y CONFIG_FUTEX=y -CONFIG_ANON_INODES=y CONFIG_EPOLL=y CONFIG_SIGNALFD=y CONFIG_TIMERFD=y @@ -114,10 +131,12 @@ CONFIG_AIO=y CONFIG_VM_EVENT_COUNTERS=y CONFIG_PCI_QUIRKS=y CONFIG_SLUB_DEBUG=y +# CONFIG_COMPAT_BRK is not set # CONFIG_SLAB is not set CONFIG_SLUB=y # CONFIG_SLOB is not set CONFIG_PROFILING=y +CONFIG_TRACEPOINTS=y # CONFIG_MARKERS is not set CONFIG_OPROFILE=y CONFIG_HAVE_OPROFILE=y @@ -127,10 +146,10 @@ CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y +# CONFIG_SLOW_WORK is not set # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y -# CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 CONFIG_MODULES=y # CONFIG_MODULE_FORCE_LOAD is not set @@ -138,11 +157,8 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set -CONFIG_KMOD=y CONFIG_BLOCK=y CONFIG_LBD=y -# CONFIG_BLK_DEV_IO_TRACE is not set -CONFIG_LSF=y CONFIG_BLK_DEV_BSG=y # CONFIG_BLK_DEV_INTEGRITY is not set @@ -158,14 +174,11 @@ CONFIG_DEFAULT_AS=y # CONFIG_DEFAULT_CFQ is not set # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="anticipatory" -CONFIG_CLASSIC_RCU=y CONFIG_FREEZER=y # # Platform support # -CONFIG_PPC_MULTIPLATFORM=y -CONFIG_CLASSIC32=y # CONFIG_PPC_CHRP is not set # CONFIG_MPC5121_ADS is not set # CONFIG_MPC5121_GENERIC is not set @@ -178,7 +191,9 @@ CONFIG_PPC_PMAC=y # CONFIG_PPC_83xx is not set # CONFIG_PPC_86xx is not set # CONFIG_EMBEDDED6xx is not set +# CONFIG_AMIGAONE is not set CONFIG_PPC_NATIVE=y +CONFIG_PPC_OF_BOOT_TRAMPOLINE=y # CONFIG_IPIC is not set CONFIG_MPIC=y # CONFIG_MPIC_WEIRD is not set @@ -212,11 +227,12 @@ CONFIG_CPU_FREQ_PMAC=y CONFIG_PPC601_SYNC_FIX=y # CONFIG_TAU is not set # CONFIG_FSL_ULI1575 is not set +# CONFIG_SIMPLE_GPIO is not set # # Kernel options # -# CONFIG_HIGHMEM is not set +CONFIG_HIGHMEM=y CONFIG_TICK_ONESHOT=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y @@ -239,6 +255,7 @@ CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y CONFIG_ARCH_HAS_WALK_MEMORY=y CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y # CONFIG_KEXEC is not set +# CONFIG_CRASH_DUMP is not set CONFIG_ARCH_FLATMEM_ENABLE=y CONFIG_ARCH_POPULATES_NODE_MAP=y CONFIG_SELECT_MEMORY_MODEL=y @@ -250,12 +267,17 @@ CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 # CONFIG_MIGRATION is not set -# CONFIG_RESOURCES_64BIT is not set # CONFIG_PHYS_ADDR_T_64BIT is not set CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y CONFIG_UNEVICTABLE_LRU=y +CONFIG_HAVE_MLOCK=y +CONFIG_HAVE_MLOCKED_PAGE_BIT=y +CONFIG_PPC_4K_PAGES=y +# CONFIG_PPC_16K_PAGES is not set +# CONFIG_PPC_64K_PAGES is not set +# CONFIG_PPC_256K_PAGES is not set CONFIG_FORCE_MAX_ZONEORDER=11 CONFIG_PROC_DEVICETREE=y # CONFIG_CMDLINE_BOOL is not set @@ -288,6 +310,8 @@ CONFIG_ARCH_SUPPORTS_MSI=y # CONFIG_PCI_MSI is not set # CONFIG_PCI_LEGACY is not set # CONFIG_PCI_DEBUG is not set +# CONFIG_PCI_STUB is not set +# CONFIG_PCI_IOV is not set CONFIG_PCCARD=m # CONFIG_PCMCIA_DEBUG is not set CONFIG_PCMCIA=m @@ -397,6 +421,8 @@ CONFIG_NETFILTER_XTABLES=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m # CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set # CONFIG_NETFILTER_XT_TARGET_DSCP is not set +CONFIG_NETFILTER_XT_TARGET_HL=m +# CONFIG_NETFILTER_XT_TARGET_LED is not set CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m @@ -405,6 +431,7 @@ CONFIG_NETFILTER_XT_TARGET_RATEEST=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m +# CONFIG_NETFILTER_XT_MATCH_CLUSTER is not set CONFIG_NETFILTER_XT_MATCH_COMMENT=m # CONFIG_NETFILTER_XT_MATCH_CONNBYTES is not set CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m @@ -415,6 +442,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m # CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_HL=m CONFIG_NETFILTER_XT_MATCH_IPRANGE=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_LIMIT=m @@ -478,17 +506,15 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_IP_DCCP=m CONFIG_INET_DCCP_DIAG=m -CONFIG_IP_DCCP_ACKVEC=y # # DCCP CCIDs Configuration (EXPERIMENTAL) # -CONFIG_IP_DCCP_CCID2=m # CONFIG_IP_DCCP_CCID2_DEBUG is not set -CONFIG_IP_DCCP_CCID3=m +CONFIG_IP_DCCP_CCID3=y # CONFIG_IP_DCCP_CCID3_DEBUG is not set CONFIG_IP_DCCP_CCID3_RTO=100 -CONFIG_IP_DCCP_TFRC_LIB=m +CONFIG_IP_DCCP_TFRC_LIB=y # # DCCP Kernel Hacking @@ -508,13 +534,16 @@ CONFIG_IP_DCCP_TFRC_LIB=m # CONFIG_LAPB is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set # CONFIG_NET_SCHED is not set CONFIG_NET_CLS_ROUTE=y +# CONFIG_DCB is not set # # Network testing # # CONFIG_NET_PKTGEN is not set +# CONFIG_NET_DROP_MONITOR is not set # CONFIG_HAMRADIO is not set # CONFIG_CAN is not set CONFIG_IRDA=m @@ -577,8 +606,6 @@ CONFIG_BT_HIDP=m # # Bluetooth device drivers # -CONFIG_BT_HCIUSB=m -# CONFIG_BT_HCIUSB_SCO is not set # CONFIG_BT_HCIBTUSB is not set # CONFIG_BT_HCIUART is not set CONFIG_BT_HCIBCM203X=m @@ -590,31 +617,27 @@ CONFIG_BT_HCIBFUSB=m # CONFIG_BT_HCIBTUART is not set # CONFIG_BT_HCIVHCI is not set # CONFIG_AF_RXRPC is not set -# CONFIG_PHONET is not set CONFIG_WIRELESS=y CONFIG_CFG80211=m -CONFIG_NL80211=y +# CONFIG_CFG80211_REG_DEBUG is not set CONFIG_WIRELESS_OLD_REGULATORY=y CONFIG_WIRELESS_EXT=y CONFIG_WIRELESS_EXT_SYSFS=y +# CONFIG_LIB80211 is not set CONFIG_MAC80211=m # # Rate control algorithm selection # -CONFIG_MAC80211_RC_PID=y -# CONFIG_MAC80211_RC_MINSTREL is not set -CONFIG_MAC80211_RC_DEFAULT_PID=y -# CONFIG_MAC80211_RC_DEFAULT_MINSTREL is not set -CONFIG_MAC80211_RC_DEFAULT="pid" +CONFIG_MAC80211_RC_MINSTREL=y +# CONFIG_MAC80211_RC_DEFAULT_PID is not set +CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y +CONFIG_MAC80211_RC_DEFAULT="minstrel" # CONFIG_MAC80211_MESH is not set CONFIG_MAC80211_LEDS=y +# CONFIG_MAC80211_DEBUGFS is not set # CONFIG_MAC80211_DEBUG_MENU is not set -CONFIG_IEEE80211=m -# CONFIG_IEEE80211_DEBUG is not set -CONFIG_IEEE80211_CRYPT_WEP=m -CONFIG_IEEE80211_CRYPT_CCMP=m -CONFIG_IEEE80211_CRYPT_TKIP=m +# CONFIG_WIMAX is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -662,17 +685,27 @@ CONFIG_BLK_DEV_RAM_SIZE=4096 # CONFIG_BLK_DEV_HD is not set CONFIG_MISC_DEVICES=y # CONFIG_PHANTOM is not set -# CONFIG_EEPROM_93CX6 is not set # CONFIG_SGI_IOC4 is not set # CONFIG_TIFM_CORE is not set +# CONFIG_ICS932S401 is not set # CONFIG_ENCLOSURE_SERVICES is not set # CONFIG_HP_ILO is not set +# CONFIG_ISL29003 is not set +# CONFIG_C2PORT is not set + +# +# EEPROM support +# +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set +# CONFIG_EEPROM_93CX6 is not set CONFIG_HAVE_IDE=y CONFIG_IDE=y # # Please see Documentation/ide/ide.txt for help/info on IDE drives # +CONFIG_IDE_XFER_MODE=y CONFIG_IDE_TIMINGS=y CONFIG_IDE_ATAPI=y # CONFIG_BLK_DEV_IDE_SATA is not set @@ -684,7 +717,6 @@ CONFIG_BLK_DEV_IDECS=m CONFIG_BLK_DEV_IDECD=y CONFIG_BLK_DEV_IDECD_VERBOSE_ERRORS=y # CONFIG_BLK_DEV_IDETAPE is not set -CONFIG_BLK_DEV_IDESCSI=y # CONFIG_IDE_TASK_IOCTL is not set CONFIG_IDE_PROC_FS=y @@ -714,6 +746,7 @@ CONFIG_BLK_DEV_IDEDMA_PCI=y # CONFIG_BLK_DEV_JMICRON is not set # CONFIG_BLK_DEV_SC1200 is not set # CONFIG_BLK_DEV_PIIX is not set +# CONFIG_BLK_DEV_IT8172 is not set # CONFIG_BLK_DEV_IT8213 is not set # CONFIG_BLK_DEV_IT821X is not set # CONFIG_BLK_DEV_NS87415 is not set @@ -728,7 +761,6 @@ CONFIG_BLK_DEV_SL82C105=y # CONFIG_BLK_DEV_TC86C001 is not set CONFIG_BLK_DEV_IDE_PMAC=y CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y -CONFIG_BLK_DEV_IDEDMA_PMAC=y CONFIG_BLK_DEV_IDEDMA=y # @@ -772,6 +804,7 @@ CONFIG_SCSI_FC_ATTRS=y # CONFIG_SCSI_SRP_ATTRS is not set CONFIG_SCSI_LOWLEVEL=y # CONFIG_ISCSI_TCP is not set +# CONFIG_SCSI_CXGB3_ISCSI is not set # CONFIG_BLK_DEV_3W_XXXX_RAID is not set # CONFIG_SCSI_3W_9XXX is not set # CONFIG_SCSI_ACARD is not set @@ -791,8 +824,12 @@ CONFIG_SCSI_AIC7XXX_OLD=m # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set # CONFIG_MEGARAID_SAS is not set +# CONFIG_SCSI_MPT2SAS is not set # CONFIG_SCSI_HPTIOP is not set # CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set +# CONFIG_FCOE is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_EATA is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set @@ -822,6 +859,7 @@ CONFIG_SCSI_MAC53C94=y # CONFIG_SCSI_SRP is not set # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set # CONFIG_SCSI_DH is not set +# CONFIG_SCSI_OSD_INITIATOR is not set # CONFIG_ATA is not set CONFIG_MD=y CONFIG_BLK_DEV_MD=m @@ -881,6 +919,7 @@ CONFIG_THERM_ADT746X=m # CONFIG_ANSLCD is not set CONFIG_PMAC_RACKMETER=m CONFIG_NETDEVICES=y +CONFIG_COMPAT_NET_DEV_OPS=y CONFIG_DUMMY=m # CONFIG_BONDING is not set # CONFIG_MACVLAN is not set @@ -898,6 +937,8 @@ CONFIG_BMAC=y CONFIG_SUNGEM=y # CONFIG_CASSINI is not set # CONFIG_NET_VENDOR_3COM is not set +# CONFIG_ETHOC is not set +# CONFIG_DNET is not set # CONFIG_NET_TULIP is not set # CONFIG_HP100 is not set # CONFIG_IBM_NEW_EMAC_ZMII is not set @@ -913,7 +954,6 @@ CONFIG_PCNET32=y # CONFIG_ADAPTEC_STARFIRE is not set # CONFIG_B44 is not set # CONFIG_FORCEDETH is not set -# CONFIG_EEPRO100 is not set # CONFIG_E100 is not set # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set @@ -923,6 +963,7 @@ CONFIG_PCNET32=y # CONFIG_R6040 is not set # CONFIG_SIS900 is not set # CONFIG_EPIC100 is not set +# CONFIG_SMSC9420 is not set # CONFIG_SUNDANCE is not set # CONFIG_TLAN is not set # CONFIG_VIA_RHINE is not set @@ -935,6 +976,7 @@ CONFIG_NETDEV_1000=y # CONFIG_E1000E is not set # CONFIG_IP1000 is not set # CONFIG_IGB is not set +# CONFIG_IGBVF is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set @@ -945,18 +987,20 @@ CONFIG_NETDEV_1000=y # CONFIG_VIA_VELOCITY is not set # CONFIG_TIGON3 is not set # CONFIG_BNX2 is not set -# CONFIG_MV643XX_ETH is not set # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set # CONFIG_ATL1E is not set +# CONFIG_ATL1C is not set # CONFIG_JME is not set CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set +CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_CHELSIO_T3 is not set # CONFIG_ENIC is not set # CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set +# CONFIG_VXGE is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set # CONFIG_NIU is not set @@ -966,6 +1010,7 @@ CONFIG_NETDEV_10000=y # CONFIG_BNX2X is not set # CONFIG_QLGE is not set # CONFIG_SFC is not set +# CONFIG_BE2NET is not set # CONFIG_TR is not set # @@ -974,20 +1019,11 @@ CONFIG_NETDEV_10000=y # CONFIG_WLAN_PRE80211 is not set CONFIG_WLAN_80211=y # CONFIG_PCMCIA_RAYCS is not set -# CONFIG_IPW2100 is not set -# CONFIG_IPW2200 is not set # CONFIG_LIBERTAS is not set # CONFIG_LIBERTAS_THINFIRM is not set # CONFIG_AIRO is not set -CONFIG_HERMES=m -CONFIG_APPLE_AIRPORT=m -# CONFIG_PLX_HERMES is not set -# CONFIG_TMD_HERMES is not set -# CONFIG_NORTEL_HERMES is not set -CONFIG_PCI_HERMES=m -CONFIG_PCMCIA_HERMES=m -# CONFIG_PCMCIA_SPECTRUM is not set # CONFIG_ATMEL is not set +# CONFIG_AT76C50X_USB is not set # CONFIG_AIRO_CS is not set # CONFIG_PCMCIA_WL3501 is not set CONFIG_PRISM54=m @@ -997,15 +1033,17 @@ CONFIG_PRISM54=m # CONFIG_RTL8187 is not set # CONFIG_ADM8211 is not set # CONFIG_MAC80211_HWSIM is not set +# CONFIG_MWL8K is not set CONFIG_P54_COMMON=m # CONFIG_P54_USB is not set # CONFIG_P54_PCI is not set +CONFIG_P54_LEDS=y # CONFIG_ATH5K is not set # CONFIG_ATH9K is not set -# CONFIG_IWLCORE is not set -# CONFIG_IWLWIFI_LEDS is not set -# CONFIG_IWLAGN is not set -# CONFIG_IWL3945 is not set +# CONFIG_AR9170_USB is not set +# CONFIG_IPW2100 is not set +# CONFIG_IPW2200 is not set +# CONFIG_IWLWIFI is not set # CONFIG_HOSTAP is not set CONFIG_B43=m CONFIG_B43_PCI_AUTOSELECT=y @@ -1025,6 +1063,19 @@ CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y # CONFIG_B43LEGACY_PIO_MODE is not set # CONFIG_ZD1211RW is not set # CONFIG_RT2X00 is not set +CONFIG_HERMES=m +CONFIG_HERMES_CACHE_FW_ON_INIT=y +CONFIG_APPLE_AIRPORT=m +# CONFIG_PLX_HERMES is not set +# CONFIG_TMD_HERMES is not set +# CONFIG_NORTEL_HERMES is not set +CONFIG_PCI_HERMES=m +CONFIG_PCMCIA_HERMES=m +# CONFIG_PCMCIA_SPECTRUM is not set + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# # # USB Network Adapters @@ -1036,6 +1087,7 @@ CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y CONFIG_USB_USBNET=m CONFIG_USB_NET_AX8817X=m CONFIG_USB_NET_CDCETHER=m +# CONFIG_USB_NET_CDC_EEM is not set # CONFIG_USB_NET_DM9601 is not set # CONFIG_USB_NET_SMSC95XX is not set # CONFIG_USB_NET_GL620A is not set @@ -1099,7 +1151,7 @@ CONFIG_INPUT_KEYBOARD=y CONFIG_INPUT_MOUSE=y # CONFIG_MOUSE_PS2 is not set # CONFIG_MOUSE_SERIAL is not set -# CONFIG_MOUSE_APPLETOUCH is not set +CONFIG_MOUSE_APPLETOUCH=y # CONFIG_MOUSE_BCM5974 is not set # CONFIG_MOUSE_VSXXXAA is not set # CONFIG_INPUT_JOYSTICK is not set @@ -1150,10 +1202,13 @@ CONFIG_SERIAL_PMACZILOG_TTYS=y # CONFIG_SERIAL_JSM is not set # CONFIG_SERIAL_OF_PLATFORM is not set CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_HVC_UDBG is not set # CONFIG_IPMI_HANDLER is not set CONFIG_HW_RANDOM=m +# CONFIG_HW_RANDOM_TIMERIOMEM is not set CONFIG_NVRAM=y CONFIG_GEN_RTC=y # CONFIG_GEN_RTC_X is not set @@ -1232,12 +1287,9 @@ CONFIG_I2C_POWERMAC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_EEPROM_AT24 is not set -# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set -# CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_MAX6875 is not set # CONFIG_SENSORS_TSL2550 is not set # CONFIG_I2C_DEBUG_CORE is not set @@ -1259,11 +1311,11 @@ CONFIG_BATTERY_PMU=y # CONFIG_THERMAL is not set # CONFIG_THERMAL_HWMON is not set # CONFIG_WATCHDOG is not set +CONFIG_SSB_POSSIBLE=y # # Sonics Silicon Backplane # -CONFIG_SSB_POSSIBLE=y CONFIG_SSB=m CONFIG_SSB_SPROM=y CONFIG_SSB_PCIHOST_POSSIBLE=y @@ -1281,18 +1333,13 @@ CONFIG_SSB_DRIVER_PCICORE=y # CONFIG_MFD_CORE is not set # CONFIG_MFD_SM501 is not set # CONFIG_HTC_PASIC3 is not set +# CONFIG_TWL4030_CORE is not set # CONFIG_MFD_TMIO is not set # CONFIG_PMIC_DA903X is not set # CONFIG_MFD_WM8400 is not set # CONFIG_MFD_WM8350_I2C is not set - -# -# Voltage and Current regulators -# +# CONFIG_MFD_PCF50633 is not set # CONFIG_REGULATOR is not set -# CONFIG_REGULATOR_FIXED_VOLTAGE is not set -# CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set -# CONFIG_REGULATOR_BQ24022 is not set # # Multimedia devices @@ -1390,6 +1437,7 @@ CONFIG_FB_ATY_BACKLIGHT=y # CONFIG_FB_KYRO is not set CONFIG_FB_3DFX=y # CONFIG_FB_3DFX_ACCEL is not set +CONFIG_FB_3DFX_I2C=y # CONFIG_FB_VOODOO1 is not set # CONFIG_FB_VT8623 is not set # CONFIG_FB_TRIDENT is not set @@ -1399,12 +1447,14 @@ CONFIG_FB_3DFX=y # CONFIG_FB_IBM_GXT4500 is not set # CONFIG_FB_VIRTUAL is not set # CONFIG_FB_METRONOME is not set +# CONFIG_FB_MB862XX is not set +# CONFIG_FB_BROADSHEET is not set CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_LCD_CLASS_DEVICE=m # CONFIG_LCD_ILI9320 is not set # CONFIG_LCD_PLATFORM is not set CONFIG_BACKLIGHT_CLASS_DEVICE=y -# CONFIG_BACKLIGHT_CORGI is not set +CONFIG_BACKLIGHT_GENERIC=y # # Display device support @@ -1444,11 +1494,13 @@ CONFIG_SND_MIXER_OSS=m CONFIG_SND_PCM_OSS=m CONFIG_SND_PCM_OSS_PLUGINS=y CONFIG_SND_SEQUENCER_OSS=y +# CONFIG_SND_HRTIMER is not set # CONFIG_SND_DYNAMIC_MINORS is not set CONFIG_SND_SUPPORT_OLD_API=y CONFIG_SND_VERBOSE_PROCFS=y # CONFIG_SND_VERBOSE_PRINTK is not set # CONFIG_SND_DEBUG is not set +CONFIG_SND_VMASTER=y CONFIG_SND_DRIVERS=y CONFIG_SND_DUMMY=m # CONFIG_SND_VIRMIDI is not set @@ -1486,6 +1538,8 @@ CONFIG_SND_PCI=y # CONFIG_SND_INDIGO is not set # CONFIG_SND_INDIGOIO is not set # CONFIG_SND_INDIGODJ is not set +# CONFIG_SND_INDIGOIOX is not set +# CONFIG_SND_INDIGODJX is not set # CONFIG_SND_EMU10K1 is not set # CONFIG_SND_EMU10K1X is not set # CONFIG_SND_ENS1370 is not set @@ -1551,28 +1605,31 @@ CONFIG_USB_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_HID_A4TECH=y CONFIG_HID_APPLE=y CONFIG_HID_BELKIN=y -CONFIG_HID_BRIGHT=y CONFIG_HID_CHERRY=y CONFIG_HID_CHICONY=y CONFIG_HID_CYPRESS=y -CONFIG_HID_DELL=y +# CONFIG_DRAGONRISE_FF is not set CONFIG_HID_EZKEY=y +CONFIG_HID_KYE=y CONFIG_HID_GYRATION=y +CONFIG_HID_KENSINGTON=y CONFIG_HID_LOGITECH=y # CONFIG_LOGITECH_FF is not set # CONFIG_LOGIRUMBLEPAD2_FF is not set CONFIG_HID_MICROSOFT=y CONFIG_HID_MONTEREY=y +CONFIG_HID_NTRIG=y CONFIG_HID_PANTHERLORD=y # CONFIG_PANTHERLORD_FF is not set CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y +# CONFIG_GREENASIA_FF is not set +CONFIG_HID_TOPSEED=y # CONFIG_THRUSTMASTER_FF is not set # CONFIG_ZEROPLUS_FF is not set CONFIG_USB_SUPPORT=y @@ -1603,6 +1660,7 @@ CONFIG_USB_EHCI_HCD=m CONFIG_USB_EHCI_ROOT_HUB_TT=y # CONFIG_USB_EHCI_TT_NEWSCHED is not set # CONFIG_USB_EHCI_HCD_PPC_OF is not set +# CONFIG_USB_OXU210HP_HCD is not set # CONFIG_USB_ISP116X_HCD is not set # CONFIG_USB_ISP1760_HCD is not set CONFIG_USB_OHCI_HCD=y @@ -1625,24 +1683,23 @@ CONFIG_USB_PRINTER=m # CONFIG_USB_TMC is not set # -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may # # -# may also be needed; see USB_STORAGE Help for more information +# also be needed; see USB_STORAGE Help for more info # CONFIG_USB_STORAGE=m # CONFIG_USB_STORAGE_DEBUG is not set # CONFIG_USB_STORAGE_DATAFAB is not set # CONFIG_USB_STORAGE_FREECOM is not set # CONFIG_USB_STORAGE_ISD200 is not set -# CONFIG_USB_STORAGE_DPCM is not set # CONFIG_USB_STORAGE_USBAT is not set # CONFIG_USB_STORAGE_SDDR09 is not set # CONFIG_USB_STORAGE_SDDR55 is not set # CONFIG_USB_STORAGE_JUMPSHOT is not set # CONFIG_USB_STORAGE_ALAUDA is not set -CONFIG_USB_STORAGE_ONETOUCH=y +CONFIG_USB_STORAGE_ONETOUCH=m # CONFIG_USB_STORAGE_KARMA is not set # CONFIG_USB_STORAGE_CYPRESS_ATACB is not set # CONFIG_USB_LIBUSUAL is not set @@ -1665,7 +1722,7 @@ CONFIG_USB_EZUSB=y # CONFIG_USB_SERIAL_CH341 is not set # CONFIG_USB_SERIAL_WHITEHEAT is not set # CONFIG_USB_SERIAL_DIGI_ACCELEPORT is not set -# CONFIG_USB_SERIAL_CP2101 is not set +# CONFIG_USB_SERIAL_CP210X is not set # CONFIG_USB_SERIAL_CYPRESS_M8 is not set # CONFIG_USB_SERIAL_EMPEG is not set # CONFIG_USB_SERIAL_FTDI_SIO is not set @@ -1701,15 +1758,19 @@ CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y # CONFIG_USB_SERIAL_NAVMAN is not set # CONFIG_USB_SERIAL_PL2303 is not set # CONFIG_USB_SERIAL_OTI6858 is not set +# CONFIG_USB_SERIAL_QUALCOMM is not set # CONFIG_USB_SERIAL_SPCP8X5 is not set # CONFIG_USB_SERIAL_HP4X is not set # CONFIG_USB_SERIAL_SAFE is not set +# CONFIG_USB_SERIAL_SIEMENS_MPI is not set # CONFIG_USB_SERIAL_SIERRAWIRELESS is not set +# CONFIG_USB_SERIAL_SYMBOL is not set # CONFIG_USB_SERIAL_TI is not set # CONFIG_USB_SERIAL_CYBERJACK is not set # CONFIG_USB_SERIAL_XIRCOM is not set # CONFIG_USB_SERIAL_OPTION is not set # CONFIG_USB_SERIAL_OMNINET is not set +# CONFIG_USB_SERIAL_OPTICON is not set # CONFIG_USB_SERIAL_DEBUG is not set # @@ -1726,7 +1787,6 @@ CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y # CONFIG_USB_LED is not set # CONFIG_USB_CYPRESS_CY7C63 is not set # CONFIG_USB_CYTHERM is not set -# CONFIG_USB_PHIDGET is not set # CONFIG_USB_IDMOUSE is not set # CONFIG_USB_FTDI_ELAN is not set CONFIG_USB_APPLEDISPLAY=m @@ -1738,6 +1798,11 @@ CONFIG_USB_APPLEDISPLAY=m # CONFIG_USB_ISIGHTFW is not set # CONFIG_USB_VST is not set # CONFIG_USB_GADGET is not set + +# +# OTG and related infrastructure +# +# CONFIG_NOP_USB_XCEIV is not set # CONFIG_UWB is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set @@ -1748,7 +1813,9 @@ CONFIG_LEDS_CLASS=y # LED drivers # # CONFIG_LEDS_PCA9532 is not set +# CONFIG_LEDS_LP5521 is not set # CONFIG_LEDS_PCA955X is not set +# CONFIG_LEDS_BD2802 is not set # # LED Triggers @@ -1759,11 +1826,16 @@ CONFIG_LEDS_TRIGGER_IDE_DISK=y # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set # CONFIG_LEDS_TRIGGER_BACKLIGHT is not set CONFIG_LEDS_TRIGGER_DEFAULT_ON=y + +# +# iptables trigger is under Netfilter config (LED target) +# # CONFIG_ACCESSIBILITY is not set # CONFIG_INFINIBAND is not set # CONFIG_EDAC is not set # CONFIG_RTC_CLASS is not set # CONFIG_DMADEVICES is not set +# CONFIG_AUXDISPLAY is not set # CONFIG_UIO is not set # CONFIG_STAGING is not set @@ -1774,6 +1846,7 @@ CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y # CONFIG_EXT3_FS_SECURITY is not set @@ -1783,7 +1856,9 @@ CONFIG_EXT4_FS_XATTR=y # CONFIG_EXT4_FS_POSIX_ACL is not set # CONFIG_EXT4_FS_SECURITY is not set CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set CONFIG_JBD2=y +# CONFIG_JBD2_DEBUG is not set CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set @@ -1792,6 +1867,7 @@ CONFIG_FILE_LOCKING=y # CONFIG_XFS_FS is not set # CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y @@ -1801,6 +1877,11 @@ CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=m # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # CONFIG_ISO9660_FS=y @@ -1831,10 +1912,7 @@ CONFIG_TMPFS=y # CONFIG_TMPFS_POSIX_ACL is not set # CONFIG_HUGETLB_PAGE is not set # CONFIG_CONFIGFS_FS is not set - -# -# Miscellaneous filesystems -# +CONFIG_MISC_FILESYSTEMS=y # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set CONFIG_HFS_FS=m @@ -1843,6 +1921,7 @@ CONFIG_HFSPLUS_FS=m # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_OMFS_FS is not set @@ -1851,6 +1930,7 @@ CONFIG_HFSPLUS_FS=m # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -1868,7 +1948,6 @@ CONFIG_NFS_ACL_SUPPORT=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y CONFIG_SUNRPC_GSS=y -# CONFIG_SUNRPC_REGISTER_V4 is not set CONFIG_RPCSEC_GSS_KRB5=y # CONFIG_RPCSEC_GSS_SPKM3 is not set CONFIG_SMB_FS=m @@ -1940,11 +2019,13 @@ CONFIG_NLS_ISO8859_1=m # CONFIG_NLS_KOI8_U is not set CONFIG_NLS_UTF8=m # CONFIG_DLM is not set +CONFIG_BINARY_PRINTF=y # # Library routines # CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_LAST_BIT=y CONFIG_CRC_CCITT=y CONFIG_CRC16=y CONFIG_CRC_T10DIF=y @@ -1954,15 +2035,18 @@ CONFIG_CRC32=y CONFIG_LIBCRC32C=m CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y +CONFIG_DECOMPRESS_GZIP=y +CONFIG_DECOMPRESS_BZIP2=y +CONFIG_DECOMPRESS_LZMA=y CONFIG_TEXTSEARCH=y CONFIG_TEXTSEARCH_KMP=m CONFIG_TEXTSEARCH_BM=m CONFIG_TEXTSEARCH_FSM=m -CONFIG_PLIST=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT=y CONFIG_HAS_DMA=y CONFIG_HAVE_LMB=y +CONFIG_NLATTR=y # # Kernel hacking @@ -1973,13 +2057,16 @@ CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_WARN=1024 CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set -# CONFIG_DEBUG_FS is not set +CONFIG_DEBUG_FS=y # CONFIG_HEADERS_CHECK is not set CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 CONFIG_SCHED_DEBUG=y CONFIG_SCHEDSTATS=y # CONFIG_TIMER_STATS is not set @@ -1994,6 +2081,7 @@ CONFIG_SCHEDSTATS=y # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set CONFIG_STACKTRACE=y # CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_HIGHMEM is not set CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set @@ -2001,6 +2089,7 @@ CONFIG_DEBUG_BUGVERBOSE=y CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_RCU_CPU_STALL_DETECTOR is not set @@ -2009,7 +2098,14 @@ CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_FAULT_INJECTION is not set CONFIG_LATENCYTOP=y CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_NOP_TRACER=y CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_RING_BUFFER=y +CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y # # Tracers @@ -2017,12 +2113,19 @@ CONFIG_HAVE_FUNCTION_TRACER=y # CONFIG_FUNCTION_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set # CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_STACK_TRACER is not set -# CONFIG_DYNAMIC_PRINTK_DEBUG is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_FTRACE_STARTUP_TEST is not set +# CONFIG_DYNAMIC_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set +CONFIG_PRINT_STACK_DEPTH=64 # CONFIG_DEBUG_STACKOVERFLOW is not set # CONFIG_DEBUG_STACK_USAGE is not set # CONFIG_CODE_PATCHING_SELFTEST is not set @@ -2033,6 +2136,7 @@ CONFIG_XMON_DEFAULT=y CONFIG_XMON_DISASSEMBLY=y CONFIG_DEBUGGER=y CONFIG_IRQSTACKS=y +# CONFIG_VIRQ_DEBUG is not set # CONFIG_BDI_SWITCH is not set CONFIG_BOOTX_TEXT=y # CONFIG_PPC_EARLY_DEBUG is not set @@ -2051,13 +2155,20 @@ CONFIG_CRYPTO=y # # CONFIG_CRYPTO_FIPS is not set CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_ALGAPI2=y CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_AEAD2=y CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y CONFIG_CRYPTO_HASH=y -CONFIG_CRYPTO_RNG=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_PCOMP=y CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y # CONFIG_CRYPTO_GF128MUL is not set CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_WORKQUEUE=y # CONFIG_CRYPTO_CRYPTD is not set CONFIG_CRYPTO_AUTHENC=y # CONFIG_CRYPTO_TEST is not set @@ -2127,6 +2238,7 @@ CONFIG_CRYPTO_TWOFISH_COMMON=m # Compression # CONFIG_CRYPTO_DEFLATE=m +# CONFIG_CRYPTO_ZLIB is not set # CONFIG_CRYPTO_LZO is not set # diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index ac14f5245d2a..e28e65e7a0e1 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -1,13 +1,14 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29-rc8 -# Fri Mar 13 09:28:45 2009 +# Linux kernel version: 2.6.30-rc5 +# Fri May 15 10:37:00 2009 # CONFIG_PPC64=y # # Processor support # +CONFIG_PPC_BOOK3S=y # CONFIG_POWER4_ONLY is not set CONFIG_POWER3=y CONFIG_POWER4=y @@ -55,9 +56,11 @@ CONFIG_OF=y # CONFIG_GENERIC_TBSYNC is not set CONFIG_AUDIT_ARCH=y CONFIG_GENERIC_BUG=y +CONFIG_DTC=y # CONFIG_DEFAULT_UIMAGE is not set # CONFIG_PPC_DCR_NATIVE is not set # CONFIG_PPC_DCR_MMIO is not set +CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # @@ -72,6 +75,7 @@ CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set # CONFIG_AUDIT is not set @@ -88,8 +92,7 @@ CONFIG_CLASSIC_RCU=y CONFIG_LOG_BUF_SHIFT=17 # CONFIG_GROUP_SCHED is not set # CONFIG_CGROUPS is not set -CONFIG_SYSFS_DEPRECATED=y -CONFIG_SYSFS_DEPRECATED_V2=y +# CONFIG_SYSFS_DEPRECATED_V2 is not set # CONFIG_RELAY is not set CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set @@ -99,6 +102,9 @@ CONFIG_NAMESPACES=y # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" +CONFIG_RD_GZIP=y +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -107,6 +113,7 @@ CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -138,6 +145,7 @@ CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_DMA_ATTRS=y CONFIG_USE_GENERIC_SMP_HELPERS=y +# CONFIG_SLOW_WORK is not set # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -150,7 +158,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y -# CONFIG_BLK_DEV_IO_TRACE is not set CONFIG_BLK_DEV_BSG=y # CONFIG_BLK_DEV_INTEGRITY is not set CONFIG_BLOCK_COMPAT=y @@ -172,7 +179,6 @@ CONFIG_DEFAULT_IOSCHED="anticipatory" # # Platform support # -CONFIG_PPC_MULTIPLATFORM=y # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_ISERIES is not set # CONFIG_PPC_PMAC is not set @@ -209,6 +215,7 @@ CONFIG_SPU_FS_64K_LS=y # CONFIG_SPU_TRACE is not set CONFIG_SPU_BASE=y # CONFIG_PQ2ADS is not set +# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set # CONFIG_IPIC is not set # CONFIG_MPIC is not set # CONFIG_MPIC_WEIRD is not set @@ -279,11 +286,14 @@ CONFIG_PHYS_ADDR_T_64BIT=y CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y CONFIG_UNEVICTABLE_LRU=y +CONFIG_HAVE_MLOCK=y +CONFIG_HAVE_MLOCKED_PAGE_BIT=y CONFIG_ARCH_MEMORY_PROBE=y CONFIG_PPC_HAS_HASH_64K=y CONFIG_PPC_4K_PAGES=y # CONFIG_PPC_16K_PAGES is not set # CONFIG_PPC_64K_PAGES is not set +# CONFIG_PPC_256K_PAGES is not set CONFIG_FORCE_MAX_ZONEORDER=13 CONFIG_SCHED_SMT=y CONFIG_PROC_DEVICETREE=y @@ -316,7 +326,6 @@ CONFIG_NET=y # # Networking options # -CONFIG_COMPAT_NET_DEV_OPS=y CONFIG_PACKET=y CONFIG_PACKET_MMAP=y CONFIG_UNIX=y @@ -389,6 +398,7 @@ CONFIG_IPV6_NDISC_NODETYPE=y # CONFIG_LAPB is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set # CONFIG_NET_SCHED is not set # CONFIG_DCB is not set @@ -396,6 +406,7 @@ CONFIG_IPV6_NDISC_NODETYPE=y # Network testing # # CONFIG_NET_PKTGEN is not set +# CONFIG_NET_DROP_MONITOR is not set # CONFIG_HAMRADIO is not set # CONFIG_CAN is not set # CONFIG_IRDA is not set @@ -419,11 +430,9 @@ CONFIG_BT_HCIBTUSB=m # CONFIG_BT_HCIBFUSB is not set # CONFIG_BT_HCIVHCI is not set # CONFIG_AF_RXRPC is not set -# CONFIG_PHONET is not set CONFIG_WIRELESS=y CONFIG_CFG80211=m # CONFIG_CFG80211_REG_DEBUG is not set -CONFIG_NL80211=y # CONFIG_WIRELESS_OLD_REGULATORY is not set CONFIG_WIRELESS_EXT=y # CONFIG_WIRELESS_EXT_SYSFS is not set @@ -602,6 +611,7 @@ CONFIG_SCSI_WAIT_SCAN=m # CONFIG_SCSI_SRP_ATTRS is not set # CONFIG_SCSI_LOWLEVEL is not set # CONFIG_SCSI_DH is not set +# CONFIG_SCSI_OSD_INITIATOR is not set # CONFIG_ATA is not set CONFIG_MD=y # CONFIG_BLK_DEV_MD is not set @@ -616,6 +626,7 @@ CONFIG_BLK_DEV_DM=m # CONFIG_DM_UEVENT is not set # CONFIG_MACINTOSH_DRIVERS is not set CONFIG_NETDEVICES=y +CONFIG_COMPAT_NET_DEV_OPS=y # CONFIG_DUMMY is not set # CONFIG_BONDING is not set # CONFIG_MACVLAN is not set @@ -625,6 +636,8 @@ CONFIG_NETDEVICES=y # CONFIG_PHYLIB is not set CONFIG_NET_ETHERNET=y CONFIG_MII=m +# CONFIG_ETHOC is not set +# CONFIG_DNET is not set # CONFIG_IBM_NEW_EMAC_ZMII is not set # CONFIG_IBM_NEW_EMAC_RGMII is not set # CONFIG_IBM_NEW_EMAC_TAH is not set @@ -646,12 +659,13 @@ CONFIG_GELIC_WIRELESS_OLD_PSK_INTERFACE=y CONFIG_WLAN_80211=y # CONFIG_LIBERTAS is not set # CONFIG_LIBERTAS_THINFIRM is not set +# CONFIG_AT76C50X_USB is not set # CONFIG_USB_ZD1201 is not set # CONFIG_USB_NET_RNDIS_WLAN is not set # CONFIG_RTL8187 is not set # CONFIG_MAC80211_HWSIM is not set # CONFIG_P54_COMMON is not set -# CONFIG_IWLWIFI_LEDS is not set +# CONFIG_AR9170_USB is not set # CONFIG_HOSTAP is not set # CONFIG_B43 is not set # CONFIG_B43LEGACY is not set @@ -673,6 +687,7 @@ CONFIG_USB_PEGASUS=m CONFIG_USB_USBNET=m CONFIG_USB_NET_AX8817X=m # CONFIG_USB_NET_CDCETHER is not set +# CONFIG_USB_NET_CDC_EEM is not set # CONFIG_USB_NET_DM9601 is not set # CONFIG_USB_NET_SMSC95XX is not set # CONFIG_USB_NET_GL620A is not set @@ -724,28 +739,7 @@ CONFIG_INPUT_EVDEV=m # # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set -CONFIG_INPUT_JOYSTICK=y -# CONFIG_JOYSTICK_ANALOG is not set -# CONFIG_JOYSTICK_A3D is not set -# CONFIG_JOYSTICK_ADI is not set -# CONFIG_JOYSTICK_COBRA is not set -# CONFIG_JOYSTICK_GF2K is not set -# CONFIG_JOYSTICK_GRIP is not set -# CONFIG_JOYSTICK_GRIP_MP is not set -# CONFIG_JOYSTICK_GUILLEMOT is not set -# CONFIG_JOYSTICK_INTERACT is not set -# CONFIG_JOYSTICK_SIDEWINDER is not set -# CONFIG_JOYSTICK_TMDC is not set -# CONFIG_JOYSTICK_IFORCE is not set -# CONFIG_JOYSTICK_WARRIOR is not set -# CONFIG_JOYSTICK_MAGELLAN is not set -# CONFIG_JOYSTICK_SPACEORB is not set -# CONFIG_JOYSTICK_SPACEBALL is not set -# CONFIG_JOYSTICK_STINGER is not set -# CONFIG_JOYSTICK_TWIDJOY is not set -# CONFIG_JOYSTICK_ZHENHUA is not set -# CONFIG_JOYSTICK_JOYDUMP is not set -# CONFIG_JOYSTICK_XPAD is not set +# CONFIG_INPUT_JOYSTICK is not set # CONFIG_INPUT_TABLET is not set # CONFIG_INPUT_TOUCHSCREEN is not set # CONFIG_INPUT_MISC is not set @@ -864,6 +858,7 @@ CONFIG_FB_PS3_DEFAULT_SIZE_M=9 # CONFIG_FB_VIRTUAL is not set # CONFIG_FB_METRONOME is not set # CONFIG_FB_MB862XX is not set +# CONFIG_FB_BROADSHEET is not set # CONFIG_BACKLIGHT_LCD_SUPPORT is not set # @@ -934,15 +929,17 @@ CONFIG_USB_HIDDEV=y # # Special HID drivers # -# CONFIG_HID_COMPAT is not set # CONFIG_HID_A4TECH is not set # CONFIG_HID_APPLE is not set # CONFIG_HID_BELKIN is not set # CONFIG_HID_CHERRY is not set # CONFIG_HID_CHICONY is not set # CONFIG_HID_CYPRESS is not set +# CONFIG_DRAGONRISE_FF is not set # CONFIG_HID_EZKEY is not set +# CONFIG_HID_KYE is not set # CONFIG_HID_GYRATION is not set +# CONFIG_HID_KENSINGTON is not set # CONFIG_HID_LOGITECH is not set # CONFIG_HID_MICROSOFT is not set # CONFIG_HID_MONTEREY is not set @@ -950,7 +947,7 @@ CONFIG_USB_HIDDEV=y # CONFIG_HID_PANTHERLORD is not set # CONFIG_HID_PETALYNX is not set # CONFIG_HID_SAMSUNG is not set -# CONFIG_HID_SONY is not set +CONFIG_HID_SONY=m # CONFIG_HID_SUNPLUS is not set # CONFIG_GREENASIA_FF is not set # CONFIG_HID_TOPSEED is not set @@ -1012,11 +1009,11 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y # CONFIG_USB_TMC is not set # -# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed; +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may # # -# see USB_STORAGE Help for more information +# also be needed; see USB_STORAGE Help for more info # CONFIG_USB_STORAGE=m # CONFIG_USB_STORAGE_DEBUG is not set @@ -1058,7 +1055,6 @@ CONFIG_USB_STORAGE=m # CONFIG_USB_LED is not set # CONFIG_USB_CYPRESS_CY7C63 is not set # CONFIG_USB_CYTHERM is not set -# CONFIG_USB_PHIDGET is not set # CONFIG_USB_IDMOUSE is not set # CONFIG_USB_FTDI_ELAN is not set # CONFIG_USB_APPLEDISPLAY is not set @@ -1074,6 +1070,7 @@ CONFIG_USB_STORAGE=m # # OTG and related infrastructure # +# CONFIG_NOP_USB_XCEIV is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set # CONFIG_NEW_LEDS is not set @@ -1113,8 +1110,10 @@ CONFIG_RTC_INTF_DEV=y # # on-CPU RTC drivers # -CONFIG_RTC_DRV_PPC=m +# CONFIG_RTC_DRV_GENERIC is not set +CONFIG_RTC_DRV_PS3=m # CONFIG_DMADEVICES is not set +# CONFIG_AUXDISPLAY is not set # CONFIG_UIO is not set # CONFIG_STAGING is not set @@ -1125,6 +1124,7 @@ CONFIG_EXT2_FS=m # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=m +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set @@ -1161,6 +1161,11 @@ CONFIG_AUTOFS4_FS=m # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # CONFIG_ISO9660_FS=m @@ -1211,6 +1216,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -1223,7 +1229,6 @@ CONFIG_LOCKD_V4=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y CONFIG_SUNRPC_GSS=y -# CONFIG_SUNRPC_REGISTER_V4 is not set CONFIG_RPCSEC_GSS_KRB5=y # CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set @@ -1283,6 +1288,7 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_NLS_KOI8_U is not set # CONFIG_NLS_UTF8 is not set # CONFIG_DLM is not set +CONFIG_BINARY_PRINTF=y # # Library routines @@ -1296,15 +1302,16 @@ CONFIG_CRC_ITU_T=m CONFIG_CRC32=y # CONFIG_CRC7 is not set # CONFIG_LIBCRC32C is not set -CONFIG_ZLIB_INFLATE=m +CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=m CONFIG_LZO_COMPRESS=m CONFIG_LZO_DECOMPRESS=m -CONFIG_PLIST=y +CONFIG_DECOMPRESS_GZIP=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT=y CONFIG_HAS_DMA=y CONFIG_HAVE_LMB=y +CONFIG_NLATTR=y # # Kernel hacking @@ -1322,6 +1329,9 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 CONFIG_SCHED_DEBUG=y # CONFIG_SCHEDSTATS is not set # CONFIG_TIMER_STATS is not set @@ -1357,12 +1367,15 @@ CONFIG_DEBUG_LIST=y # CONFIG_FAULT_INJECTION is not set # CONFIG_LATENCYTOP is not set CONFIG_SYSCTL_SYSCALL_CHECK=y +# CONFIG_DEBUG_PAGEALLOC is not set CONFIG_NOP_TRACER=y CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_RING_BUFFER=y CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y # # Tracers @@ -1371,18 +1384,21 @@ CONFIG_TRACING=y # CONFIG_IRQSOFF_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set # CONFIG_BOOT_TRACER is not set # CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_FTRACE_STARTUP_TEST is not set -# CONFIG_DYNAMIC_PRINTK_DEBUG is not set +# CONFIG_DYNAMIC_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set CONFIG_PRINT_STACK_DEPTH=64 CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_STACK_USAGE is not set -# CONFIG_DEBUG_PAGEALLOC is not set # CONFIG_CODE_PATCHING_SELFTEST is not set # CONFIG_FTR_FIXUP_SELFTEST is not set # CONFIG_MSI_BITMAP_SELFTEST is not set @@ -1415,10 +1431,12 @@ CONFIG_CRYPTO_HASH=y CONFIG_CRYPTO_HASH2=y CONFIG_CRYPTO_RNG=m CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_PCOMP=y CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_MANAGER2=y CONFIG_CRYPTO_GF128MUL=m # CONFIG_CRYPTO_NULL is not set +CONFIG_CRYPTO_WORKQUEUE=y # CONFIG_CRYPTO_CRYPTD is not set # CONFIG_CRYPTO_AUTHENC is not set # CONFIG_CRYPTO_TEST is not set @@ -1487,6 +1505,7 @@ CONFIG_CRYPTO_SALSA20=m # Compression # # CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_ZLIB is not set CONFIG_CRYPTO_LZO=m # diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index c69f2b5f0cc4..cb448d68452c 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -26,7 +26,9 @@ * allocate the space "normally" and use the cache management functions * to ensure it is consistent. */ -extern void *__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp); +struct device; +extern void *__dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp); extern void __dma_free_coherent(size_t size, void *vaddr); extern void __dma_sync(void *vaddr, size_t size, int direction); extern void __dma_sync_page(struct page *page, unsigned long offset, @@ -37,7 +39,7 @@ extern void __dma_sync_page(struct page *page, unsigned long offset, * Cache coherent cores. */ -#define __dma_alloc_coherent(gfp, size, handle) NULL +#define __dma_alloc_coherent(dev, gfp, size, handle) NULL #define __dma_free_coherent(size, addr) ((void)0) #define __dma_sync(addr, size, rw) ((void)0) #define __dma_sync_page(pg, off, sz, rw) ((void)0) diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index d60fd18f428c..f1f4e23a84e9 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -14,8 +14,6 @@ #ifndef _ASM_FIXMAP_H #define _ASM_FIXMAP_H -extern unsigned long FIXADDR_TOP; - #ifndef __ASSEMBLY__ #include <linux/kernel.h> #include <asm/page.h> @@ -24,6 +22,8 @@ extern unsigned long FIXADDR_TOP; #include <asm/kmap_types.h> #endif +#define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE)) + /* * Here we define all the compile-time 'special' virtual * addresses. The point is to have a constant address at diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index ba45c997830f..c9ff9d75990e 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -10,7 +10,7 @@ extern unsigned long va_to_phys(unsigned long address); extern pte_t *va_to_pte(unsigned long address); -extern unsigned long ioremap_bot, ioremap_base; +extern unsigned long ioremap_bot; #ifdef CONFIG_44x extern int icache_44x_need_flush; @@ -56,8 +56,30 @@ extern int icache_44x_need_flush; printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) /* + * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary + * value (for now) on others, from where we can start layout kernel + * virtual space that goes below PKMAP and FIXMAP + */ +#ifdef CONFIG_HIGHMEM +#define KVIRT_TOP PKMAP_BASE +#else +#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */ +#endif + +/* + * ioremap_bot starts at that address. Early ioremaps move down from there, + * until mem_init() at which point this becomes the top of the vmalloc + * and ioremap space + */ +#ifdef CONFIG_NOT_COHERENT_CACHE +#define IOREMAP_TOP ((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK) +#else +#define IOREMAP_TOP KVIRT_TOP +#endif + +/* * Just any arbitrary offset to the start of the vmalloc VM area: the - * current 64MB value just means that there will be a 64MB "hole" after the + * current 16MB value just means that there will be a 64MB "hole" after the * physical memory until the kernel virtual memory starts. That means that * any out-of-bounds memory accesses will hopefully be caught. * The vmalloc() routines leaves a hole of 4kB between each vmalloced diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 53c7788cba78..6b02793dc75b 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -32,7 +32,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size, { void *ret; #ifdef CONFIG_NOT_COHERENT_CACHE - ret = __dma_alloc_coherent(size, dma_handle, flag); + ret = __dma_alloc_coherent(dev, size, dma_handle, flag); if (ret == NULL) return NULL; *dma_handle += get_dma_direct_offset(dev); diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 70e2a736be1f..2d182f119d1d 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -157,7 +157,7 @@ __ftrace_make_nop(struct module *mod, * 0xe8, 0x4c, 0x00, 0x28, ld r2,40(r12) */ - pr_debug("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc); + pr_devel("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc); /* Find where the trampoline jumps to */ if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { @@ -165,7 +165,7 @@ __ftrace_make_nop(struct module *mod, return -EFAULT; } - pr_debug(" %08x %08x", jmp[0], jmp[1]); + pr_devel(" %08x %08x", jmp[0], jmp[1]); /* verify that this is what we expect it to be */ if (((jmp[0] & 0xffff0000) != 0x3d820000) || @@ -181,18 +181,18 @@ __ftrace_make_nop(struct module *mod, offset = ((unsigned)((unsigned short)jmp[0]) << 16) + (int)((short)jmp[1]); - pr_debug(" %x ", offset); + pr_devel(" %x ", offset); /* get the address this jumps too */ tramp = mod->arch.toc + offset + 32; - pr_debug("toc: %lx", tramp); + pr_devel("toc: %lx", tramp); if (probe_kernel_read(jmp, (void *)tramp, 8)) { printk(KERN_ERR "Failed to read %lx\n", tramp); return -EFAULT; } - pr_debug(" %08x %08x\n", jmp[0], jmp[1]); + pr_devel(" %08x %08x\n", jmp[0], jmp[1]); ptr = ((unsigned long)jmp[0] << 32) + jmp[1]; @@ -269,7 +269,7 @@ __ftrace_make_nop(struct module *mod, * 0x4e, 0x80, 0x04, 0x20 bctr */ - pr_debug("ip:%lx jumps to %lx", ip, tramp); + pr_devel("ip:%lx jumps to %lx", ip, tramp); /* Find where the trampoline jumps to */ if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { @@ -277,7 +277,7 @@ __ftrace_make_nop(struct module *mod, return -EFAULT; } - pr_debug(" %08x %08x ", jmp[0], jmp[1]); + pr_devel(" %08x %08x ", jmp[0], jmp[1]); /* verify that this is what we expect it to be */ if (((jmp[0] & 0xffff0000) != 0x3d600000) || @@ -293,7 +293,7 @@ __ftrace_make_nop(struct module *mod, if (tramp & 0x8000) tramp -= 0x10000; - pr_debug(" %lx ", tramp); + pr_devel(" %lx ", tramp); if (tramp != addr) { printk(KERN_ERR @@ -402,7 +402,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) /* ld r2,40(r1) */ op[1] = 0xe8410028; - pr_debug("write to %lx\n", rec->ip); + pr_devel("write to %lx\n", rec->ip); if (probe_kernel_write((void *)ip, op, MCOUNT_INSN_SIZE * 2)) return -EPERM; @@ -442,7 +442,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return -EINVAL; } - pr_debug("write to %lx\n", rec->ip); + pr_devel("write to %lx\n", rec->ip); if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) return -EPERM; @@ -594,7 +594,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) PPC_LONG "2b,4b\n" ".previous" - : [old] "=r" (old), [faulted] "=r" (faulted) + : [old] "=&r" (old), [faulted] "=r" (faulted) : [parent] "r" (parent), [return_hooker] "r" (return_hooker) : "memory" ); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index a047a6cfca4d..8ef8a14abc95 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -264,6 +264,7 @@ SECTIONS *(.data.page_aligned) } + . = ALIGN(L1_CACHE_BYTES); .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { *(.data.cacheline_aligned) } diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 8db35278a4b4..29b742b90f1f 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -18,7 +18,6 @@ obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ memcpy_64.o usercopy_64.o mem_64.o string.o obj-$(CONFIG_XMON) += sstep.o obj-$(CONFIG_KPROBES) += sstep.o -obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o ifeq ($(CONFIG_PPC64),y) obj-$(CONFIG_SMP) += locks.o diff --git a/arch/powerpc/lib/dma-noncoherent.c b/arch/powerpc/lib/dma-noncoherent.c deleted file mode 100644 index 005a28d380af..000000000000 --- a/arch/powerpc/lib/dma-noncoherent.c +++ /dev/null @@ -1,237 +0,0 @@ -/* - * PowerPC version derived from arch/arm/mm/consistent.c - * Copyright (C) 2001 Dan Malek (dmalek@jlc.net) - * - * Copyright (C) 2000 Russell King - * - * Consistent memory allocators. Used for DMA devices that want to - * share uncached memory with the processor core. The function return - * is the virtual address and 'dma_handle' is the physical address. - * Mostly stolen from the ARM port, with some changes for PowerPC. - * -- Dan - * - * Reorganized to get rid of the arch-specific consistent_* functions - * and provide non-coherent implementations for the DMA API. -Matt - * - * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent() - * implementation. This is pulled straight from ARM and barely - * modified. -Matt - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/highmem.h> -#include <linux/dma-mapping.h> -#include <linux/vmalloc.h> - -#include <asm/tlbflush.h> - -/* - * Allocate DMA-coherent memory space and return both the kernel remapped - * virtual and bus address for that space. - */ -void * -__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) -{ - struct page *page; - unsigned long order; - int i; - unsigned int nr_pages = PAGE_ALIGN(size)>>PAGE_SHIFT; - unsigned int array_size = nr_pages * sizeof(struct page *); - struct page **pages; - struct page *end; - u64 mask = 0x00ffffff, limit; /* ISA default */ - struct vm_struct *area; - - BUG_ON(!mem_init_done); - size = PAGE_ALIGN(size); - limit = (mask + 1) & ~mask; - if (limit && size >= limit) { - printk(KERN_WARNING "coherent allocation too big (requested " - "%#x mask %#Lx)\n", size, mask); - return NULL; - } - - order = get_order(size); - - if (mask != 0xffffffff) - gfp |= GFP_DMA; - - page = alloc_pages(gfp, order); - if (!page) - goto no_page; - - end = page + (1 << order); - - /* - * Invalidate any data that might be lurking in the - * kernel direct-mapped region for device DMA. - */ - { - unsigned long kaddr = (unsigned long)page_address(page); - memset(page_address(page), 0, size); - flush_dcache_range(kaddr, kaddr + size); - } - - split_page(page, order); - - /* - * Set the "dma handle" - */ - *handle = page_to_phys(page); - - area = get_vm_area_caller(size, VM_IOREMAP, - __builtin_return_address(1)); - if (!area) - goto out_free_pages; - - if (array_size > PAGE_SIZE) { - pages = vmalloc(array_size); - area->flags |= VM_VPAGES; - } else { - pages = kmalloc(array_size, GFP_KERNEL); - } - if (!pages) - goto out_free_area; - - area->pages = pages; - area->nr_pages = nr_pages; - - for (i = 0; i < nr_pages; i++) - pages[i] = page + i; - - if (map_vm_area(area, pgprot_noncached(PAGE_KERNEL), &pages)) - goto out_unmap; - - /* - * Free the otherwise unused pages. - */ - page += nr_pages; - while (page < end) { - __free_page(page); - page++; - } - - return area->addr; -out_unmap: - vunmap(area->addr); - if (array_size > PAGE_SIZE) - vfree(pages); - else - kfree(pages); - goto out_free_pages; -out_free_area: - free_vm_area(area); -out_free_pages: - if (page) - __free_pages(page, order); -no_page: - return NULL; -} -EXPORT_SYMBOL(__dma_alloc_coherent); - -/* - * free a page as defined by the above mapping. - */ -void __dma_free_coherent(size_t size, void *vaddr) -{ - vfree(vaddr); - -} -EXPORT_SYMBOL(__dma_free_coherent); - -/* - * make an area consistent. - */ -void __dma_sync(void *vaddr, size_t size, int direction) -{ - unsigned long start = (unsigned long)vaddr; - unsigned long end = start + size; - - switch (direction) { - case DMA_NONE: - BUG(); - case DMA_FROM_DEVICE: - /* - * invalidate only when cache-line aligned otherwise there is - * the potential for discarding uncommitted data from the cache - */ - if ((start & (L1_CACHE_BYTES - 1)) || (size & (L1_CACHE_BYTES - 1))) - flush_dcache_range(start, end); - else - invalidate_dcache_range(start, end); - break; - case DMA_TO_DEVICE: /* writeback only */ - clean_dcache_range(start, end); - break; - case DMA_BIDIRECTIONAL: /* writeback and invalidate */ - flush_dcache_range(start, end); - break; - } -} -EXPORT_SYMBOL(__dma_sync); - -#ifdef CONFIG_HIGHMEM -/* - * __dma_sync_page() implementation for systems using highmem. - * In this case, each page of a buffer must be kmapped/kunmapped - * in order to have a virtual address for __dma_sync(). This must - * not sleep so kmap_atomic()/kunmap_atomic() are used. - * - * Note: yes, it is possible and correct to have a buffer extend - * beyond the first page. - */ -static inline void __dma_sync_page_highmem(struct page *page, - unsigned long offset, size_t size, int direction) -{ - size_t seg_size = min((size_t)(PAGE_SIZE - offset), size); - size_t cur_size = seg_size; - unsigned long flags, start, seg_offset = offset; - int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE; - int seg_nr = 0; - - local_irq_save(flags); - - do { - start = (unsigned long)kmap_atomic(page + seg_nr, - KM_PPC_SYNC_PAGE) + seg_offset; - - /* Sync this buffer segment */ - __dma_sync((void *)start, seg_size, direction); - kunmap_atomic((void *)start, KM_PPC_SYNC_PAGE); - seg_nr++; - - /* Calculate next buffer segment size */ - seg_size = min((size_t)PAGE_SIZE, size - cur_size); - - /* Add the segment size to our running total */ - cur_size += seg_size; - seg_offset = 0; - } while (seg_nr < nr_segs); - - local_irq_restore(flags); -} -#endif /* CONFIG_HIGHMEM */ - -/* - * __dma_sync_page makes memory consistent. identical to __dma_sync, but - * takes a struct page instead of a virtual address - */ -void __dma_sync_page(struct page *page, unsigned long offset, - size_t size, int direction) -{ -#ifdef CONFIG_HIGHMEM - __dma_sync_page_highmem(page, offset, size, direction); -#else - unsigned long start = (unsigned long)page_address(page) + offset; - __dma_sync((void *)start, size, direction); -#endif -} -EXPORT_SYMBOL(__dma_sync_page); diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 17290bcedc5e..b746f4ca4209 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -26,3 +26,4 @@ obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o obj-$(CONFIG_PPC_MM_SLICES) += slice.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o +obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c new file mode 100644 index 000000000000..36692f5c9a76 --- /dev/null +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -0,0 +1,400 @@ +/* + * PowerPC version derived from arch/arm/mm/consistent.c + * Copyright (C) 2001 Dan Malek (dmalek@jlc.net) + * + * Copyright (C) 2000 Russell King + * + * Consistent memory allocators. Used for DMA devices that want to + * share uncached memory with the processor core. The function return + * is the virtual address and 'dma_handle' is the physical address. + * Mostly stolen from the ARM port, with some changes for PowerPC. + * -- Dan + * + * Reorganized to get rid of the arch-specific consistent_* functions + * and provide non-coherent implementations for the DMA API. -Matt + * + * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent() + * implementation. This is pulled straight from ARM and barely + * modified. -Matt + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/highmem.h> +#include <linux/dma-mapping.h> + +#include <asm/tlbflush.h> + +#include "mmu_decl.h" + +/* + * This address range defaults to a value that is safe for all + * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It + * can be further configured for specific applications under + * the "Advanced Setup" menu. -Matt + */ +#define CONSISTENT_BASE (IOREMAP_TOP) +#define CONSISTENT_END (CONSISTENT_BASE + CONFIG_CONSISTENT_SIZE) +#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) + +/* + * This is the page table (2MB) covering uncached, DMA consistent allocations + */ +static DEFINE_SPINLOCK(consistent_lock); + +/* + * VM region handling support. + * + * This should become something generic, handling VM region allocations for + * vmalloc and similar (ioremap, module space, etc). + * + * I envisage vmalloc()'s supporting vm_struct becoming: + * + * struct vm_struct { + * struct vm_region region; + * unsigned long flags; + * struct page **pages; + * unsigned int nr_pages; + * unsigned long phys_addr; + * }; + * + * get_vm_area() would then call vm_region_alloc with an appropriate + * struct vm_region head (eg): + * + * struct vm_region vmalloc_head = { + * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), + * .vm_start = VMALLOC_START, + * .vm_end = VMALLOC_END, + * }; + * + * However, vmalloc_head.vm_start is variable (typically, it is dependent on + * the amount of RAM found at boot time.) I would imagine that get_vm_area() + * would have to initialise this each time prior to calling vm_region_alloc(). + */ +struct ppc_vm_region { + struct list_head vm_list; + unsigned long vm_start; + unsigned long vm_end; +}; + +static struct ppc_vm_region consistent_head = { + .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), + .vm_start = CONSISTENT_BASE, + .vm_end = CONSISTENT_END, +}; + +static struct ppc_vm_region * +ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp) +{ + unsigned long addr = head->vm_start, end = head->vm_end - size; + unsigned long flags; + struct ppc_vm_region *c, *new; + + new = kmalloc(sizeof(struct ppc_vm_region), gfp); + if (!new) + goto out; + + spin_lock_irqsave(&consistent_lock, flags); + + list_for_each_entry(c, &head->vm_list, vm_list) { + if ((addr + size) < addr) + goto nospc; + if ((addr + size) <= c->vm_start) + goto found; + addr = c->vm_end; + if (addr > end) + goto nospc; + } + + found: + /* + * Insert this entry _before_ the one we found. + */ + list_add_tail(&new->vm_list, &c->vm_list); + new->vm_start = addr; + new->vm_end = addr + size; + + spin_unlock_irqrestore(&consistent_lock, flags); + return new; + + nospc: + spin_unlock_irqrestore(&consistent_lock, flags); + kfree(new); + out: + return NULL; +} + +static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr) +{ + struct ppc_vm_region *c; + + list_for_each_entry(c, &head->vm_list, vm_list) { + if (c->vm_start == addr) + goto out; + } + c = NULL; + out: + return c; +} + +/* + * Allocate DMA-coherent memory space and return both the kernel remapped + * virtual and bus address for that space. + */ +void * +__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) +{ + struct page *page; + struct ppc_vm_region *c; + unsigned long order; + u64 mask = ISA_DMA_THRESHOLD, limit; + + if (dev) { + mask = dev->coherent_dma_mask; + + /* + * Sanity check the DMA mask - it must be non-zero, and + * must be able to be satisfied by a DMA allocation. + */ + if (mask == 0) { + dev_warn(dev, "coherent DMA mask is unset\n"); + goto no_page; + } + + if ((~mask) & ISA_DMA_THRESHOLD) { + dev_warn(dev, "coherent DMA mask %#llx is smaller " + "than system GFP_DMA mask %#llx\n", + mask, (unsigned long long)ISA_DMA_THRESHOLD); + goto no_page; + } + } + + + size = PAGE_ALIGN(size); + limit = (mask + 1) & ~mask; + if ((limit && size >= limit) || + size >= (CONSISTENT_END - CONSISTENT_BASE)) { + printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n", + size, mask); + return NULL; + } + + order = get_order(size); + + /* Might be useful if we ever have a real legacy DMA zone... */ + if (mask != 0xffffffff) + gfp |= GFP_DMA; + + page = alloc_pages(gfp, order); + if (!page) + goto no_page; + + /* + * Invalidate any data that might be lurking in the + * kernel direct-mapped region for device DMA. + */ + { + unsigned long kaddr = (unsigned long)page_address(page); + memset(page_address(page), 0, size); + flush_dcache_range(kaddr, kaddr + size); + } + + /* + * Allocate a virtual address in the consistent mapping region. + */ + c = ppc_vm_region_alloc(&consistent_head, size, + gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); + if (c) { + unsigned long vaddr = c->vm_start; + struct page *end = page + (1 << order); + + split_page(page, order); + + /* + * Set the "dma handle" + */ + *handle = page_to_phys(page); + + do { + SetPageReserved(page); + map_page(vaddr, page_to_phys(page), + pgprot_noncached(PAGE_KERNEL)); + page++; + vaddr += PAGE_SIZE; + } while (size -= PAGE_SIZE); + + /* + * Free the otherwise unused pages. + */ + while (page < end) { + __free_page(page); + page++; + } + + return (void *)c->vm_start; + } + + if (page) + __free_pages(page, order); + no_page: + return NULL; +} +EXPORT_SYMBOL(__dma_alloc_coherent); + +/* + * free a page as defined by the above mapping. + */ +void __dma_free_coherent(size_t size, void *vaddr) +{ + struct ppc_vm_region *c; + unsigned long flags, addr; + + size = PAGE_ALIGN(size); + + spin_lock_irqsave(&consistent_lock, flags); + + c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr); + if (!c) + goto no_area; + + if ((c->vm_end - c->vm_start) != size) { + printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", + __func__, c->vm_end - c->vm_start, size); + dump_stack(); + size = c->vm_end - c->vm_start; + } + + addr = c->vm_start; + do { + pte_t *ptep; + unsigned long pfn; + + ptep = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(addr), + addr), + addr), + addr); + if (!pte_none(*ptep) && pte_present(*ptep)) { + pfn = pte_pfn(*ptep); + pte_clear(&init_mm, addr, ptep); + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + + ClearPageReserved(page); + __free_page(page); + } + } + addr += PAGE_SIZE; + } while (size -= PAGE_SIZE); + + flush_tlb_kernel_range(c->vm_start, c->vm_end); + + list_del(&c->vm_list); + + spin_unlock_irqrestore(&consistent_lock, flags); + + kfree(c); + return; + + no_area: + spin_unlock_irqrestore(&consistent_lock, flags); + printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", + __func__, vaddr); + dump_stack(); +} +EXPORT_SYMBOL(__dma_free_coherent); + +/* + * make an area consistent. + */ +void __dma_sync(void *vaddr, size_t size, int direction) +{ + unsigned long start = (unsigned long)vaddr; + unsigned long end = start + size; + + switch (direction) { + case DMA_NONE: + BUG(); + case DMA_FROM_DEVICE: + /* + * invalidate only when cache-line aligned otherwise there is + * the potential for discarding uncommitted data from the cache + */ + if ((start & (L1_CACHE_BYTES - 1)) || (size & (L1_CACHE_BYTES - 1))) + flush_dcache_range(start, end); + else + invalidate_dcache_range(start, end); + break; + case DMA_TO_DEVICE: /* writeback only */ + clean_dcache_range(start, end); + break; + case DMA_BIDIRECTIONAL: /* writeback and invalidate */ + flush_dcache_range(start, end); + break; + } +} +EXPORT_SYMBOL(__dma_sync); + +#ifdef CONFIG_HIGHMEM +/* + * __dma_sync_page() implementation for systems using highmem. + * In this case, each page of a buffer must be kmapped/kunmapped + * in order to have a virtual address for __dma_sync(). This must + * not sleep so kmap_atomic()/kunmap_atomic() are used. + * + * Note: yes, it is possible and correct to have a buffer extend + * beyond the first page. + */ +static inline void __dma_sync_page_highmem(struct page *page, + unsigned long offset, size_t size, int direction) +{ + size_t seg_size = min((size_t)(PAGE_SIZE - offset), size); + size_t cur_size = seg_size; + unsigned long flags, start, seg_offset = offset; + int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE; + int seg_nr = 0; + + local_irq_save(flags); + + do { + start = (unsigned long)kmap_atomic(page + seg_nr, + KM_PPC_SYNC_PAGE) + seg_offset; + + /* Sync this buffer segment */ + __dma_sync((void *)start, seg_size, direction); + kunmap_atomic((void *)start, KM_PPC_SYNC_PAGE); + seg_nr++; + + /* Calculate next buffer segment size */ + seg_size = min((size_t)PAGE_SIZE, size - cur_size); + + /* Add the segment size to our running total */ + cur_size += seg_size; + seg_offset = 0; + } while (seg_nr < nr_segs); + + local_irq_restore(flags); +} +#endif /* CONFIG_HIGHMEM */ + +/* + * __dma_sync_page makes memory consistent. identical to __dma_sync, but + * takes a struct page instead of a virtual address + */ +void __dma_sync_page(struct page *page, unsigned long offset, + size_t size, int direction) +{ +#ifdef CONFIG_HIGHMEM + __dma_sync_page_highmem(page, offset, size, direction); +#else + unsigned long start = (unsigned long)page_address(page) + offset; + __dma_sync((void *)start, size, direction); +#endif +} +EXPORT_SYMBOL(__dma_sync_page); diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 666a5e8a5be1..3de6a0d93824 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -168,12 +168,8 @@ void __init MMU_init(void) ppc_md.progress("MMU:mapin", 0x301); mapin_ram(); -#ifdef CONFIG_HIGHMEM - ioremap_base = PKMAP_BASE; -#else - ioremap_base = 0xfe000000UL; /* for now, could be 0xfffff000 */ -#endif /* CONFIG_HIGHMEM */ - ioremap_bot = ioremap_base; + /* Initialize early top-down ioremap allocator */ + ioremap_bot = IOREMAP_TOP; /* Map in I/O resources */ if (ppc_md.progress) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d0602a76bf7f..579382c163a9 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -380,6 +380,23 @@ void __init mem_init(void) bsssize >> 10, initsize >> 10); +#ifdef CONFIG_PPC32 + pr_info("Kernel virtual memory layout:\n"); + pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP); +#ifdef CONFIG_HIGHMEM + pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n", + PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP)); +#endif /* CONFIG_HIGHMEM */ +#ifdef CONFIG_NOT_COHERENT_CACHE + pr_info(" * 0x%08lx..0x%08lx : consistent mem\n", + IOREMAP_TOP, IOREMAP_TOP + CONFIG_CONSISTENT_SIZE); +#endif /* CONFIG_NOT_COHERENT_CACHE */ + pr_info(" * 0x%08lx..0x%08lx : early ioremap\n", + ioremap_bot, IOREMAP_TOP); + pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n", + VMALLOC_START, VMALLOC_END); +#endif /* CONFIG_PPC32 */ + mem_init_done = 1; } diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index a70e311bd457..030d0005b4d2 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -127,12 +127,12 @@ static unsigned int steal_context_up(unsigned int id) pr_debug("[%d] steal context %d from mm @%p\n", cpu, id, mm); - /* Mark this mm has having no context anymore */ - mm->context.id = MMU_NO_CONTEXT; - /* Flush the TLB for that context */ local_flush_tlb_mm(mm); + /* Mark this mm has having no context anymore */ + mm->context.id = MMU_NO_CONTEXT; + /* XXX This clear should ultimately be part of local_flush_tlb_mm */ __clear_bit(id, stale_map[cpu]); diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index f5c6fd42265c..ae1d67cc090c 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -219,7 +219,8 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, entry = do_dcache_icache_coherency(entry); changed = !pte_same(*(ptep), entry); if (changed) { - assert_pte_locked(vma->vm_mm, address); + if (!(vma->vm_flags & VM_HUGETLB)) + assert_pte_locked(vma->vm_mm, address); __ptep_set_access_flags(ptep, entry); flush_tlb_page_nohash(vma, address); } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 430d0908fa50..5422169626ba 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -399,8 +399,6 @@ void kernel_map_pages(struct page *page, int numpages, int enable) #endif /* CONFIG_DEBUG_PAGEALLOC */ static int fixmaps; -unsigned long FIXADDR_TOP = (-PAGE_SIZE); -EXPORT_SYMBOL(FIXADDR_TOP); void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index 301855263b81..04296ffff8bf 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -592,3 +592,17 @@ int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel) } return irq; } + +static void __devinit quirk_ipr_msi(struct pci_dev *dev) +{ + /* Something prevents MSIs from the IPR from working on Bimini, + * and the driver has no smarts to recover. So disable MSI + * on it for now. */ + + if (machine_is(maple)) { + dev->no_msi = 1; + dev_info(&dev->dev, "Quirk disabled MSI\n"); + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_OBSIDIAN, + quirk_ipr_msi); diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 80b513449f4c..be3581a8c294 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -333,7 +333,7 @@ static void xics_eoi_lpar(unsigned int virq) lpar_xirr_info_set((0xff << 24) | irq); } -static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) +static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) { unsigned int irq; int status; @@ -342,14 +342,14 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) irq = (unsigned int)irq_map[virq].hwirq; if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) - return; + return -1; status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); if (status) { printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", __func__, irq, status); - return; + return -1; } /* @@ -363,7 +363,7 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) printk(KERN_WARNING "%s: No online cpus in the mask %s for irq %d\n", __func__, cpulist, virq); - return; + return -1; } status = rtas_call(ibm_set_xive, 3, 1, NULL, @@ -372,8 +372,10 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) if (status) { printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n", __func__, irq, status); - return; + return -1; } + + return 0; } static struct irq_chip xics_pic_direct = { diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 0efc12d1a3d7..352d8c3ef526 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -807,7 +807,7 @@ static void mpic_end_ipi(unsigned int irq) #endif /* CONFIG_SMP */ -void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask) +int mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask) { struct mpic *mpic = mpic_from_irq(irq); unsigned int src = mpic_irq_to_hw(irq); @@ -824,6 +824,8 @@ void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask) mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), mpic_physmask(cpus_addr(tmp)[0])); } + + return 0; } static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type) diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h index 3cef2af10f42..eff433c322a0 100644 --- a/arch/powerpc/sysdev/mpic.h +++ b/arch/powerpc/sysdev/mpic.h @@ -36,6 +36,6 @@ static inline int mpic_pasemi_msi_init(struct mpic *mpic) extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type); extern void mpic_set_vector(unsigned int virq, unsigned int vector); -extern void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask); +extern int mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask); #endif /* _POWERPC_SYSDEV_MPIC_H */ diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c index 39e46919df14..f2a29641b6a3 100644 --- a/arch/sh/boards/board-ap325rxa.c +++ b/arch/sh/boards/board-ap325rxa.c @@ -263,6 +263,9 @@ static int camera_probe(void) struct i2c_msg msg; int ret; + if (!a) + return -ENODEV; + camera_power(1); msg.addr = 0x6e; msg.buf = camera_ncm03j_magic; diff --git a/arch/sh/include/asm/flat.h b/arch/sh/include/asm/flat.h index d3b2b4f109e3..5d84df5e27f6 100644 --- a/arch/sh/include/asm/flat.h +++ b/arch/sh/include/asm/flat.h @@ -12,7 +12,6 @@ #ifndef __ASM_SH_FLAT_H #define __ASM_SH_FLAT_H -#define flat_stack_align(sp) /* nothing needed */ #define flat_argvp_envp_on_stack() 0 #define flat_old_ram_flag(flags) (flags) #define flat_reloc_valid(reloc, size) ((reloc) <= (size)) diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h index 425c2f9be6d5..d42e393078c4 100644 --- a/arch/sparc/include/asm/elf_64.h +++ b/arch/sparc/include/asm/elf_64.h @@ -208,8 +208,9 @@ do { unsigned long new_flags = current_thread_info()->flags; \ else \ clear_thread_flag(TIF_ABI_PENDING); \ /* flush_thread will update pgd cache */ \ - if (current->personality != PER_LINUX32) \ - set_personality(PER_LINUX); \ + if (personality(current->personality) != PER_LINUX32) \ + set_personality(PER_LINUX | \ + (current->personality & (~PER_MASK))); \ } while (0) #endif /* !(__ASM_SPARC64_ELF_H) */ diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index 639ac805448a..65865726b283 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -102,8 +102,8 @@ struct thread_info { #define TI_KERN_CNTD1 0x00000488 #define TI_PCR 0x00000490 #define TI_RESTART_BLOCK 0x00000498 -#define TI_KUNA_REGS 0x000004c0 -#define TI_KUNA_INSN 0x000004c8 +#define TI_KUNA_REGS 0x000004c8 +#define TI_KUNA_INSN 0x000004d0 #define TI_FPREGS 0x00000500 /* We embed this in the uppermost byte of thread_info->flags */ diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index 5deabe921a47..e5e78f9cfc95 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -318,10 +318,12 @@ static void sun4u_irq_enable(unsigned int virt_irq) } } -static void sun4u_set_affinity(unsigned int virt_irq, +static int sun4u_set_affinity(unsigned int virt_irq, const struct cpumask *mask) { sun4u_irq_enable(virt_irq); + + return 0; } /* Don't do anything. The desc->status check for IRQ_DISABLED in @@ -377,7 +379,7 @@ static void sun4v_irq_enable(unsigned int virt_irq) ino, err); } -static void sun4v_set_affinity(unsigned int virt_irq, +static int sun4v_set_affinity(unsigned int virt_irq, const struct cpumask *mask) { unsigned int ino = virt_irq_table[virt_irq].dev_ino; @@ -388,6 +390,8 @@ static void sun4v_set_affinity(unsigned int virt_irq, if (err != HV_EOK) printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): " "err(%d)\n", ino, cpuid, err); + + return 0; } static void sun4v_irq_disable(unsigned int virt_irq) @@ -445,7 +449,7 @@ static void sun4v_virq_enable(unsigned int virt_irq) dev_handle, dev_ino, err); } -static void sun4v_virt_set_affinity(unsigned int virt_irq, +static int sun4v_virt_set_affinity(unsigned int virt_irq, const struct cpumask *mask) { unsigned long cpuid, dev_handle, dev_ino; @@ -461,6 +465,8 @@ static void sun4v_virt_set_affinity(unsigned int virt_irq, printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " "err(%d)\n", dev_handle, dev_ino, cpuid, err); + + return 0; } static void sun4v_virq_disable(unsigned int virt_irq) diff --git a/arch/sparc/lib/csum_copy_from_user.S b/arch/sparc/lib/csum_copy_from_user.S index a22eddbe5dba..e0304e6a2242 100644 --- a/arch/sparc/lib/csum_copy_from_user.S +++ b/arch/sparc/lib/csum_copy_from_user.S @@ -5,7 +5,7 @@ #define EX_LD(x) \ 98: x; \ - .section .fixup; \ + .section .fixup, "ax"; \ .align 4; \ 99: retl; \ mov -1, %o0; \ diff --git a/arch/sparc/lib/csum_copy_to_user.S b/arch/sparc/lib/csum_copy_to_user.S index d5b12f441f02..afd01acc587c 100644 --- a/arch/sparc/lib/csum_copy_to_user.S +++ b/arch/sparc/lib/csum_copy_to_user.S @@ -5,7 +5,7 @@ #define EX_ST(x) \ 98: x; \ - .section .fixup; \ + .section .fixup,"ax"; \ .align 4; \ 99: retl; \ mov -1, %o0; \ diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild new file mode 100644 index 000000000000..ad8ec356fb36 --- /dev/null +++ b/arch/x86/Kbuild @@ -0,0 +1,16 @@ + +obj-$(CONFIG_KVM) += kvm/ + +# Xen paravirtualization support +obj-$(CONFIG_XEN) += xen/ + +# lguest paravirtualization support +obj-$(CONFIG_LGUEST_GUEST) += lguest/ + +obj-y += kernel/ +obj-y += mm/ + +obj-y += crypto/ +obj-y += vdso/ +obj-$(CONFIG_IA32_EMULATION) += ia32/ + diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index df9e885eee14..aafae3b140de 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -47,6 +47,11 @@ config X86 select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA +config OUTPUT_FORMAT + string + default "elf32-i386" if X86_32 + default "elf64-x86-64" if X86_64 + config ARCH_DEFCONFIG string default "arch/x86/configs/i386_defconfig" if X86_32 @@ -274,15 +279,9 @@ config SPARSE_IRQ If you don't know what to do here, say N. -config NUMA_MIGRATE_IRQ_DESC - bool "Move irq desc when changing irq smp_affinity" +config NUMA_IRQ_DESC + def_bool y depends on SPARSE_IRQ && NUMA - depends on BROKEN - default n - ---help--- - This enables moving irq_desc to cpu/node that irq will use handled. - - If you don't know what to do here, say N. config X86_MPPARSE bool "Enable MPS table" if ACPI @@ -355,7 +354,7 @@ config X86_UV depends on X86_64 depends on X86_EXTENDED_PLATFORM depends on NUMA - select X86_X2APIC + depends on X86_X2APIC ---help--- This option is needed in order to support SGI Ultraviolet systems. If you don't have one of these, you should say N here. @@ -498,6 +497,19 @@ config PARAVIRT over full virtualization. However, when run without a hypervisor the kernel is theoretically slower and slightly larger. +config PARAVIRT_SPINLOCKS + bool "Paravirtualization layer for spinlocks" + depends on PARAVIRT && SMP && EXPERIMENTAL + ---help--- + Paravirtualized spinlocks allow a pvops backend to replace the + spinlock implementation with something virtualization-friendly + (for example, block the virtual CPU rather than spinning). + + Unfortunately the downside is an up to 5% performance hit on + native kernels, with various workloads. + + If you are unsure how to answer this question, answer N. + config PARAVIRT_CLOCK bool default n @@ -1453,9 +1465,7 @@ config KEXEC_JUMP config PHYSICAL_START hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) - default "0x1000000" if X86_NUMAQ - default "0x200000" if X86_64 - default "0x100000" + default "0x1000000" ---help--- This gives the physical address where the kernel is loaded. @@ -1474,15 +1484,15 @@ config PHYSICAL_START to be specifically compiled to run from a specific memory area (normally a reserved region) and this option comes handy. - So if you are using bzImage for capturing the crash dump, leave - the value here unchanged to 0x100000 and set CONFIG_RELOCATABLE=y. - Otherwise if you plan to use vmlinux for capturing the crash dump - change this value to start of the reserved region (Typically 16MB - 0x1000000). In other words, it can be set based on the "X" value as - specified in the "crashkernel=YM@XM" command line boot parameter - passed to the panic-ed kernel. Typically this parameter is set as - crashkernel=64M@16M. Please take a look at - Documentation/kdump/kdump.txt for more details about crash dumps. + So if you are using bzImage for capturing the crash dump, + leave the value here unchanged to 0x1000000 and set + CONFIG_RELOCATABLE=y. Otherwise if you plan to use vmlinux + for capturing the crash dump change this value to start of + the reserved region. In other words, it can be set based on + the "X" value as specified in the "crashkernel=YM@XM" + command line boot parameter passed to the panic-ed + kernel. Please take a look at Documentation/kdump/kdump.txt + for more details about crash dumps. Usage of bzImage for capturing the crash dump is recommended as one does not have to build two kernels. Same kernel can be used @@ -1495,8 +1505,8 @@ config PHYSICAL_START Don't change this unless you know what you are doing. config RELOCATABLE - bool "Build a relocatable kernel (EXPERIMENTAL)" - depends on EXPERIMENTAL + bool "Build a relocatable kernel" + default y ---help--- This builds a kernel image that retains relocation information so it can be loaded someplace besides the default 1MB. @@ -1511,12 +1521,16 @@ config RELOCATABLE it has been loaded at and the compile time physical address (CONFIG_PHYSICAL_START) is ignored. +# Relocation on x86-32 needs some additional build support +config X86_NEED_RELOCS + def_bool y + depends on X86_32 && RELOCATABLE + config PHYSICAL_ALIGN hex prompt "Alignment value to which kernel should be aligned" if X86_32 - default "0x100000" if X86_32 - default "0x200000" if X86_64 - range 0x2000 0x400000 + default "0x1000000" + range 0x2000 0x1000000 ---help--- This value puts the alignment restrictions on physical address where kernel is loaded and run from. Kernel is compiled for an diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 22b752e09487..d105f29bb6bb 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -159,10 +159,17 @@ config IOMMU_DEBUG options. See Documentation/x86_64/boot-options.txt for more details. +config IOMMU_STRESS + bool "Enable IOMMU stress-test mode" + ---help--- + This option disables various optimizations in IOMMU related + code to do real stress testing of the IOMMU code. This option + will cause a performance drop and should only be enabled for + testing. + config IOMMU_LEAK bool "IOMMU leak tracing" - depends on DEBUG_KERNEL - depends on IOMMU_DEBUG + depends on IOMMU_DEBUG && DMA_API_DEBUG ---help--- Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 8c86b72afdc2..edbd0ca62067 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -7,8 +7,6 @@ else KBUILD_DEFCONFIG := $(ARCH)_defconfig endif -core-$(CONFIG_KVM) += arch/x86/kvm/ - # BITS is used as extension for files which are available in a 32 bit # and a 64 bit version to simplify shared Makefiles. # e.g.: obj-y += foo_$(BITS).o @@ -118,21 +116,8 @@ head-y += arch/x86/kernel/init_task.o libs-y += arch/x86/lib/ -# Sub architecture files that needs linking first -core-y += $(fcore-y) - -# Xen paravirtualization support -core-$(CONFIG_XEN) += arch/x86/xen/ - -# lguest paravirtualization support -core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/ - -core-y += arch/x86/kernel/ -core-y += arch/x86/mm/ - -core-y += arch/x86/crypto/ -core-y += arch/x86/vdso/ -core-$(CONFIG_IA32_EMULATION) += arch/x86/ia32/ +# See arch/x86/Kbuild for content of core part of the kernel +core-y += arch/x86/ # drivers-y are linked after core-y drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore index 172cf8a98bdd..851fe936d242 100644 --- a/arch/x86/boot/.gitignore +++ b/arch/x86/boot/.gitignore @@ -3,6 +3,8 @@ bzImage cpustr.h mkcpustr offsets.h +voffset.h +zoffset.h setup setup.bin setup.elf diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 6633b6e7505a..8d16ada25048 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -26,9 +26,10 @@ targets := vmlinux.bin setup.bin setup.elf bzImage targets += fdimage fdimage144 fdimage288 image.iso mtools.conf subdir- := compressed -setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o +setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o edd.o setup-y += header.o main.o mca.o memory.o pm.o pmjump.o -setup-y += printf.o string.o tty.o video.o video-mode.o version.o +setup-y += printf.o regs.o string.o tty.o video.o video-mode.o +setup-y += version.o setup-$(CONFIG_X86_APM_BOOT) += apm.o # The link order of the video-*.o modules can matter. In particular, @@ -86,19 +87,27 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) -sed-offsets := -e 's/^00*/0/' \ - -e 's/^\([0-9a-fA-F]*\) . \(input_data\|input_data_end\)$$/\#define \2 0x\1/p' +sed-voffset := -e 's/^\([0-9a-fA-F]*\) . \(_text\|_end\)$$/\#define VO_\2 0x\1/p' -quiet_cmd_offsets = OFFSETS $@ - cmd_offsets = $(NM) $< | sed -n $(sed-offsets) > $@ +quiet_cmd_voffset = VOFFSET $@ + cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ -$(obj)/offsets.h: $(obj)/compressed/vmlinux FORCE - $(call if_changed,offsets) +targets += voffset.h +$(obj)/voffset.h: vmlinux FORCE + $(call if_changed,voffset) + +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p' + +quiet_cmd_zoffset = ZOFFSET $@ + cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ + +targets += zoffset.h +$(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE + $(call if_changed,zoffset) -targets += offsets.h AFLAGS_header.o += -I$(obj) -$(obj)/header.o: $(obj)/offsets.h +$(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h LDFLAGS_setup.elf := -T $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c index 7c19ce8c2442..64a31a6d751a 100644 --- a/arch/x86/boot/a20.c +++ b/arch/x86/boot/a20.c @@ -2,7 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007-2008 rPath, Inc. - All Rights Reserved - * Copyright 2009 Intel Corporation + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -90,8 +90,11 @@ static int a20_test_long(void) static void enable_a20_bios(void) { - asm volatile("pushfl; int $0x15; popfl" - : : "a" ((u16)0x2401)); + struct biosregs ireg; + + initregs(&ireg); + ireg.ax = 0x2401; + intcall(0x15, &ireg, NULL); } static void enable_a20_kbc(void) diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c index 7aa6033001f9..ee274834ea8b 100644 --- a/arch/x86/boot/apm.c +++ b/arch/x86/boot/apm.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * Original APM BIOS checking by Stephen Rothwell, May 1994 * (sfr@canb.auug.org.au) @@ -19,75 +20,56 @@ int query_apm_bios(void) { - u16 ax, bx, cx, dx, di; - u32 ebx, esi; - u8 err; + struct biosregs ireg, oreg; /* APM BIOS installation check */ - ax = 0x5300; - bx = cx = 0; - asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0" - : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx) - : : "esi", "edi"); + initregs(&ireg); + ireg.ah = 0x53; + intcall(0x15, &ireg, &oreg); - if (err) + if (oreg.flags & X86_EFLAGS_CF) return -1; /* No APM BIOS */ - if (bx != 0x504d) /* "PM" signature */ + if (oreg.bx != 0x504d) /* "PM" signature */ return -1; - if (!(cx & 0x02)) /* 32 bits supported? */ + if (!(oreg.cx & 0x02)) /* 32 bits supported? */ return -1; /* Disconnect first, just in case */ - ax = 0x5304; - bx = 0; - asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp" - : "+a" (ax), "+b" (bx) - : : "ecx", "edx", "esi", "edi"); - - /* Paranoia */ - ebx = esi = 0; - cx = dx = di = 0; + ireg.al = 0x04; + intcall(0x15, &ireg, NULL); /* 32-bit connect */ - asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %6" - : "=a" (ax), "+b" (ebx), "+c" (cx), "+d" (dx), - "+S" (esi), "+D" (di), "=m" (err) - : "a" (0x5303)); - - boot_params.apm_bios_info.cseg = ax; - boot_params.apm_bios_info.offset = ebx; - boot_params.apm_bios_info.cseg_16 = cx; - boot_params.apm_bios_info.dseg = dx; - boot_params.apm_bios_info.cseg_len = (u16)esi; - boot_params.apm_bios_info.cseg_16_len = esi >> 16; - boot_params.apm_bios_info.dseg_len = di; - - if (err) + ireg.al = 0x03; + intcall(0x15, &ireg, &oreg); + + boot_params.apm_bios_info.cseg = oreg.ax; + boot_params.apm_bios_info.offset = oreg.ebx; + boot_params.apm_bios_info.cseg_16 = oreg.cx; + boot_params.apm_bios_info.dseg = oreg.dx; + boot_params.apm_bios_info.cseg_len = oreg.si; + boot_params.apm_bios_info.cseg_16_len = oreg.hsi; + boot_params.apm_bios_info.dseg_len = oreg.di; + + if (oreg.flags & X86_EFLAGS_CF) return -1; /* Redo the installation check as the 32-bit connect; some BIOSes return different flags this way... */ - ax = 0x5300; - bx = cx = 0; - asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0" - : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx) - : : "esi", "edi"); + ireg.al = 0x00; + intcall(0x15, &ireg, &oreg); - if (err || bx != 0x504d) { + if ((oreg.eflags & X86_EFLAGS_CF) || oreg.bx != 0x504d) { /* Failure with 32-bit connect, try to disconect and ignore */ - ax = 0x5304; - bx = 0; - asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp" - : "+a" (ax), "+b" (bx) - : : "ecx", "edx", "esi", "edi"); + ireg.al = 0x04; + intcall(0x15, &ireg, NULL); return -1; } - boot_params.apm_bios_info.version = ax; - boot_params.apm_bios_info.flags = cx; + boot_params.apm_bios_info.version = oreg.ax; + boot_params.apm_bios_info.flags = oreg.cx; return 0; } diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S new file mode 100644 index 000000000000..507793739ea5 --- /dev/null +++ b/arch/x86/boot/bioscall.S @@ -0,0 +1,82 @@ +/* ----------------------------------------------------------------------- + * + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2 or (at your + * option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * "Glove box" for BIOS calls. Avoids the constant problems with BIOSes + * touching registers they shouldn't be. + */ + + .code16 + .text + .globl intcall + .type intcall, @function +intcall: + /* Self-modify the INT instruction. Ugly, but works. */ + cmpb %al, 3f + je 1f + movb %al, 3f + jmp 1f /* Synchronize pipeline */ +1: + /* Save state */ + pushfl + pushw %fs + pushw %gs + pushal + + /* Copy input state to stack frame */ + subw $44, %sp + movw %dx, %si + movw %sp, %di + movw $11, %cx + rep; movsd + + /* Pop full state from the stack */ + popal + popw %gs + popw %fs + popw %es + popw %ds + popfl + + /* Actual INT */ + .byte 0xcd /* INT opcode */ +3: .byte 0 + + /* Push full state to the stack */ + pushfl + pushw %ds + pushw %es + pushw %fs + pushw %gs + pushal + + /* Re-establish C environment invariants */ + cld + movzwl %sp, %esp + movw %cs, %ax + movw %ax, %ds + movw %ax, %es + + /* Copy output state from stack frame */ + movw 68(%esp), %di /* Original %cx == 3rd argument */ + andw %di, %di + jz 4f + movw %sp, %si + movw $11, %cx + rep; movsd +4: addw $44, %sp + + /* Restore state and return */ + popal + popw %gs + popw %fs + popfl + retl + .size intcall, .-intcall diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 7b2692e897e5..98239d2658f2 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -26,6 +27,7 @@ #include <asm/setup.h> #include "bitops.h" #include <asm/cpufeature.h> +#include <asm/processor-flags.h> /* Useful macros */ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) @@ -241,6 +243,49 @@ int enable_a20(void); /* apm.c */ int query_apm_bios(void); +/* bioscall.c */ +struct biosregs { + union { + struct { + u32 edi; + u32 esi; + u32 ebp; + u32 _esp; + u32 ebx; + u32 edx; + u32 ecx; + u32 eax; + u32 _fsgs; + u32 _dses; + u32 eflags; + }; + struct { + u16 di, hdi; + u16 si, hsi; + u16 bp, hbp; + u16 _sp, _hsp; + u16 bx, hbx; + u16 dx, hdx; + u16 cx, hcx; + u16 ax, hax; + u16 gs, fs; + u16 es, ds; + u16 flags, hflags; + }; + struct { + u8 dil, dih, edi2, edi3; + u8 sil, sih, esi2, esi3; + u8 bpl, bph, ebp2, ebp3; + u8 _spl, _sph, _esp2, _esp3; + u8 bl, bh, ebx2, ebx3; + u8 dl, dh, edx2, edx3; + u8 cl, ch, ecx2, ecx3; + u8 al, ah, eax2, eax3; + }; + }; +}; +void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg); + /* cmdline.c */ int cmdline_find_option(const char *option, char *buffer, int bufsize); int cmdline_find_option_bool(const char *option); @@ -279,6 +324,9 @@ int sprintf(char *buf, const char *fmt, ...); int vsprintf(char *buf, const char *fmt, va_list args); int printf(const char *fmt, ...); +/* regs.c */ +void initregs(struct biosregs *regs); + /* string.c */ int strcmp(const char *str1, const char *str2); size_t strnlen(const char *s, size_t maxlen); diff --git a/arch/x86/boot/compressed/.gitignore b/arch/x86/boot/compressed/.gitignore index 63eff3b04d01..4a46fab7162e 100644 --- a/arch/x86/boot/compressed/.gitignore +++ b/arch/x86/boot/compressed/.gitignore @@ -1,3 +1,6 @@ relocs vmlinux.bin.all vmlinux.relocs +vmlinux.lds +mkpiggy +piggy.S diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 65551c9f8571..49c8a4c37d7c 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -19,7 +19,9 @@ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ LDFLAGS := -m elf_$(UTS_MACHINE) LDFLAGS_vmlinux := -T -$(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE +hostprogs-y := mkpiggy + +$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE $(call if_changed,ld) @: @@ -29,7 +31,7 @@ $(obj)/vmlinux.bin: vmlinux FORCE targets += vmlinux.bin.all vmlinux.relocs relocs -hostprogs-$(CONFIG_X86_32) += relocs +hostprogs-$(CONFIG_X86_NEED_RELOCS) += relocs quiet_cmd_relocs = RELOCS $@ cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $< @@ -37,46 +39,22 @@ $(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE $(call if_changed,relocs) vmlinux.bin.all-y := $(obj)/vmlinux.bin -vmlinux.bin.all-$(CONFIG_RELOCATABLE) += $(obj)/vmlinux.relocs -quiet_cmd_relocbin = BUILD $@ - cmd_relocbin = cat $(filter-out FORCE,$^) > $@ -$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE - $(call if_changed,relocbin) - -ifeq ($(CONFIG_X86_32),y) +vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs -ifdef CONFIG_RELOCATABLE -$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE - $(call if_changed,gzip) -$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin.all FORCE - $(call if_changed,bzip2) -$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin.all FORCE - $(call if_changed,lzma) -else -$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE +$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE $(call if_changed,gzip) -$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE +$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE $(call if_changed,bzip2) -$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE +$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE $(call if_changed,lzma) -endif -LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T -else +suffix-$(CONFIG_KERNEL_GZIP) := gz +suffix-$(CONFIG_KERNEL_BZIP2) := bz2 +suffix-$(CONFIG_KERNEL_LZMA) := lzma -$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE - $(call if_changed,gzip) -$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE - $(call if_changed,bzip2) -$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE - $(call if_changed,lzma) - -LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T -endif +quiet_cmd_mkpiggy = MKPIGGY $@ + cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) -suffix_$(CONFIG_KERNEL_GZIP) = gz -suffix_$(CONFIG_KERNEL_BZIP2) = bz2 -suffix_$(CONFIG_KERNEL_LZMA) = lzma - -$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix_y) FORCE - $(call if_changed,ld) +targets += piggy.S +$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE + $(call if_changed,mkpiggy) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 3a8a866fb2e2..75e4f001e706 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -12,16 +12,16 @@ * the page directory. [According to comments etc elsewhere on a compressed * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC] * - * Page 0 is deliberately kept safe, since System Management Mode code in + * Page 0 is deliberately kept safe, since System Management Mode code in * laptops may need to access the BIOS data stored there. This is also - * useful for future device drivers that either access the BIOS via VM86 + * useful for future device drivers that either access the BIOS via VM86 * mode. */ /* * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 */ -.text + .text #include <linux/linkage.h> #include <asm/segment.h> @@ -29,161 +29,151 @@ #include <asm/boot.h> #include <asm/asm-offsets.h> -.section ".text.head","ax",@progbits + .section ".text.head","ax",@progbits ENTRY(startup_32) cld - /* test KEEP_SEGMENTS flag to see if the bootloader is asking - * us to not reload segments */ - testb $(1<<6), BP_loadflags(%esi) - jnz 1f + /* + * Test KEEP_SEGMENTS flag to see if the bootloader is asking + * us to not reload segments + */ + testb $(1<<6), BP_loadflags(%esi) + jnz 1f cli - movl $(__BOOT_DS),%eax - movl %eax,%ds - movl %eax,%es - movl %eax,%fs - movl %eax,%gs - movl %eax,%ss + movl $__BOOT_DS, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %fs + movl %eax, %gs + movl %eax, %ss 1: -/* Calculate the delta between where we were compiled to run +/* + * Calculate the delta between where we were compiled to run * at and where we were actually loaded at. This can only be done * with a short local call on x86. Nothing else will tell us what * address we are running at. The reserved chunk of the real-mode * data at 0x1e4 (defined as a scratch field) are used as the stack * for this calculation. Only 4 bytes are needed. */ - leal (0x1e4+4)(%esi), %esp - call 1f -1: popl %ebp - subl $1b, %ebp + leal (BP_scratch+4)(%esi), %esp + call 1f +1: popl %ebp + subl $1b, %ebp -/* %ebp contains the address we are loaded at by the boot loader and %ebx +/* + * %ebp contains the address we are loaded at by the boot loader and %ebx * contains the address where we should move the kernel image temporarily * for safe in-place decompression. */ #ifdef CONFIG_RELOCATABLE - movl %ebp, %ebx - addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebx - andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx + movl %ebp, %ebx + movl BP_kernel_alignment(%esi), %eax + decl %eax + addl %eax, %ebx + notl %eax + andl %eax, %ebx #else - movl $LOAD_PHYSICAL_ADDR, %ebx + movl $LOAD_PHYSICAL_ADDR, %ebx #endif - /* Replace the compressed data size with the uncompressed size */ - subl input_len(%ebp), %ebx - movl output_len(%ebp), %eax - addl %eax, %ebx - /* Add 8 bytes for every 32K input block */ - shrl $12, %eax - addl %eax, %ebx - /* Add 32K + 18 bytes of extra slack */ - addl $(32768 + 18), %ebx - /* Align on a 4K boundary */ - addl $4095, %ebx - andl $~4095, %ebx - -/* Copy the compressed kernel to the end of our buffer + /* Target address to relocate to for decompression */ + addl $z_extract_offset, %ebx + + /* Set up the stack */ + leal boot_stack_end(%ebx), %esp + + /* Zero EFLAGS */ + pushl $0 + popfl + +/* + * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. */ - pushl %esi - leal _end(%ebp), %esi - leal _end(%ebx), %edi - movl $(_end - startup_32), %ecx + pushl %esi + leal (_bss-4)(%ebp), %esi + leal (_bss-4)(%ebx), %edi + movl $(_bss - startup_32), %ecx + shrl $2, %ecx std - rep - movsb + rep movsl cld - popl %esi - -/* Compute the kernel start address. - */ -#ifdef CONFIG_RELOCATABLE - addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebp - andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebp -#else - movl $LOAD_PHYSICAL_ADDR, %ebp -#endif + popl %esi /* * Jump to the relocated address. */ - leal relocated(%ebx), %eax - jmp *%eax + leal relocated(%ebx), %eax + jmp *%eax ENDPROC(startup_32) -.section ".text" + .text relocated: /* - * Clear BSS - */ - xorl %eax,%eax - leal _edata(%ebx),%edi - leal _end(%ebx), %ecx - subl %edi,%ecx - cld - rep - stosb - -/* - * Setup the stack for the decompressor + * Clear BSS (stack is currently empty) */ - leal boot_stack_end(%ebx), %esp + xorl %eax, %eax + leal _bss(%ebx), %edi + leal _ebss(%ebx), %ecx + subl %edi, %ecx + shrl $2, %ecx + rep stosl /* * Do the decompression, and jump to the new kernel.. */ - movl output_len(%ebx), %eax - pushl %eax - # push arguments for decompress_kernel: - pushl %ebp # output address - movl input_len(%ebx), %eax - pushl %eax # input_len - leal input_data(%ebx), %eax - pushl %eax # input_data - leal boot_heap(%ebx), %eax - pushl %eax # heap area - pushl %esi # real mode pointer - call decompress_kernel - addl $20, %esp - popl %ecx + leal z_extract_offset_negative(%ebx), %ebp + /* push arguments for decompress_kernel: */ + pushl %ebp /* output address */ + pushl $z_input_len /* input_len */ + leal input_data(%ebx), %eax + pushl %eax /* input_data */ + leal boot_heap(%ebx), %eax + pushl %eax /* heap area */ + pushl %esi /* real mode pointer */ + call decompress_kernel + addl $20, %esp #if CONFIG_RELOCATABLE -/* Find the address of the relocations. +/* + * Find the address of the relocations. */ - movl %ebp, %edi - addl %ecx, %edi + leal z_output_len(%ebp), %edi -/* Calculate the delta between where vmlinux was compiled to run +/* + * Calculate the delta between where vmlinux was compiled to run * and where it was actually loaded. */ - movl %ebp, %ebx - subl $LOAD_PHYSICAL_ADDR, %ebx - jz 2f /* Nothing to be done if loaded at compiled addr. */ + movl %ebp, %ebx + subl $LOAD_PHYSICAL_ADDR, %ebx + jz 2f /* Nothing to be done if loaded at compiled addr. */ /* * Process relocations. */ -1: subl $4, %edi - movl 0(%edi), %ecx - testl %ecx, %ecx - jz 2f - addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) - jmp 1b +1: subl $4, %edi + movl (%edi), %ecx + testl %ecx, %ecx + jz 2f + addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) + jmp 1b 2: #endif /* * Jump to the decompressed kernel. */ - xorl %ebx,%ebx - jmp *%ebp + xorl %ebx, %ebx + jmp *%ebp -.bss -/* Stack and heap for uncompression */ -.balign 4 +/* + * Stack and heap for uncompression + */ + .bss + .balign 4 boot_heap: .fill BOOT_HEAP_SIZE, 1, 0 boot_stack: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index ed4a82948002..f62c284db9eb 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -21,8 +21,8 @@ /* * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 */ -.code32 -.text + .code32 + .text #include <linux/linkage.h> #include <asm/segment.h> @@ -33,12 +33,14 @@ #include <asm/processor-flags.h> #include <asm/asm-offsets.h> -.section ".text.head" + .section ".text.head" .code32 ENTRY(startup_32) cld - /* test KEEP_SEGMENTS flag to see if the bootloader is asking - * us to not reload segments */ + /* + * Test KEEP_SEGMENTS flag to see if the bootloader is asking + * us to not reload segments + */ testb $(1<<6), BP_loadflags(%esi) jnz 1f @@ -49,14 +51,15 @@ ENTRY(startup_32) movl %eax, %ss 1: -/* Calculate the delta between where we were compiled to run +/* + * Calculate the delta between where we were compiled to run * at and where we were actually loaded at. This can only be done * with a short local call on x86. Nothing else will tell us what * address we are running at. The reserved chunk of the real-mode * data at 0x1e4 (defined as a scratch field) are used as the stack * for this calculation. Only 4 bytes are needed. */ - leal (0x1e4+4)(%esi), %esp + leal (BP_scratch+4)(%esi), %esp call 1f 1: popl %ebp subl $1b, %ebp @@ -70,32 +73,28 @@ ENTRY(startup_32) testl %eax, %eax jnz no_longmode -/* Compute the delta between where we were compiled to run at +/* + * Compute the delta between where we were compiled to run at * and where the code will actually run at. - */ -/* %ebp contains the address we are loaded at by the boot loader and %ebx + * + * %ebp contains the address we are loaded at by the boot loader and %ebx * contains the address where we should move the kernel image temporarily * for safe in-place decompression. */ #ifdef CONFIG_RELOCATABLE movl %ebp, %ebx - addl $(PMD_PAGE_SIZE -1), %ebx - andl $PMD_PAGE_MASK, %ebx + movl BP_kernel_alignment(%esi), %eax + decl %eax + addl %eax, %ebx + notl %eax + andl %eax, %ebx #else - movl $CONFIG_PHYSICAL_START, %ebx + movl $LOAD_PHYSICAL_ADDR, %ebx #endif - /* Replace the compressed data size with the uncompressed size */ - subl input_len(%ebp), %ebx - movl output_len(%ebp), %eax - addl %eax, %ebx - /* Add 8 bytes for every 32K input block */ - shrl $12, %eax - addl %eax, %ebx - /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */ - addl $(32768 + 18 + 4095), %ebx - andl $~4095, %ebx + /* Target address to relocate to for decompression */ + addl $z_extract_offset, %ebx /* * Prepare for entering 64 bit mode @@ -114,7 +113,7 @@ ENTRY(startup_32) /* * Build early 4G boot pagetable */ - /* Initialize Page tables to 0*/ + /* Initialize Page tables to 0 */ leal pgtable(%ebx), %edi xorl %eax, %eax movl $((4096*6)/4), %ecx @@ -155,7 +154,8 @@ ENTRY(startup_32) btsl $_EFER_LME, %eax wrmsr - /* Setup for the jump to 64bit mode + /* + * Setup for the jump to 64bit mode * * When the jump is performend we will be in long mode but * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1 @@ -184,7 +184,8 @@ no_longmode: #include "../../kernel/verify_cpu_64.S" - /* Be careful here startup_64 needs to be at a predictable + /* + * Be careful here startup_64 needs to be at a predictable * address so I can export it in an ELF header. Bootloaders * should look at the ELF header to find this address, as * it may change in the future. @@ -192,7 +193,8 @@ no_longmode: .code64 .org 0x200 ENTRY(startup_64) - /* We come here either from startup_32 or directly from a + /* + * We come here either from startup_32 or directly from a * 64bit bootloader. If we come here from a bootloader we depend on * an identity mapped page table being provied that maps our * entire text+data+bss and hopefully all of memory. @@ -209,50 +211,54 @@ ENTRY(startup_64) movl $0x20, %eax ltr %ax - /* Compute the decompressed kernel start address. It is where + /* + * Compute the decompressed kernel start address. It is where * we were loaded at aligned to a 2M boundary. %rbp contains the * decompressed kernel start address. * * If it is a relocatable kernel then decompress and run the kernel * from load address aligned to 2MB addr, otherwise decompress and - * run the kernel from CONFIG_PHYSICAL_START + * run the kernel from LOAD_PHYSICAL_ADDR + * + * We cannot rely on the calculation done in 32-bit mode, since we + * may have been invoked via the 64-bit entry point. */ /* Start with the delta to where the kernel will run at. */ #ifdef CONFIG_RELOCATABLE leaq startup_32(%rip) /* - $startup_32 */, %rbp - addq $(PMD_PAGE_SIZE - 1), %rbp - andq $PMD_PAGE_MASK, %rbp - movq %rbp, %rbx + movl BP_kernel_alignment(%rsi), %eax + decl %eax + addq %rax, %rbp + notq %rax + andq %rax, %rbp #else - movq $CONFIG_PHYSICAL_START, %rbp - movq %rbp, %rbx + movq $LOAD_PHYSICAL_ADDR, %rbp #endif - /* Replace the compressed data size with the uncompressed size */ - movl input_len(%rip), %eax - subq %rax, %rbx - movl output_len(%rip), %eax - addq %rax, %rbx - /* Add 8 bytes for every 32K input block */ - shrq $12, %rax - addq %rax, %rbx - /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */ - addq $(32768 + 18 + 4095), %rbx - andq $~4095, %rbx - -/* Copy the compressed kernel to the end of our buffer + /* Target address to relocate to for decompression */ + leaq z_extract_offset(%rbp), %rbx + + /* Set up the stack */ + leaq boot_stack_end(%rbx), %rsp + + /* Zero EFLAGS */ + pushq $0 + popfq + +/* + * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. */ - leaq _end_before_pgt(%rip), %r8 - leaq _end_before_pgt(%rbx), %r9 - movq $_end_before_pgt /* - $startup_32 */, %rcx -1: subq $8, %r8 - subq $8, %r9 - movq 0(%r8), %rax - movq %rax, 0(%r9) - subq $8, %rcx - jnz 1b + pushq %rsi + leaq (_bss-8)(%rip), %rsi + leaq (_bss-8)(%rbx), %rdi + movq $_bss /* - $startup_32 */, %rcx + shrq $3, %rcx + std + rep movsq + cld + popq %rsi /* * Jump to the relocated address. @@ -260,37 +266,28 @@ ENTRY(startup_64) leaq relocated(%rbx), %rax jmp *%rax -.section ".text" + .text relocated: /* - * Clear BSS + * Clear BSS (stack is currently empty) */ - xorq %rax, %rax - leaq _edata(%rbx), %rdi - leaq _end_before_pgt(%rbx), %rcx + xorl %eax, %eax + leaq _bss(%rip), %rdi + leaq _ebss(%rip), %rcx subq %rdi, %rcx - cld - rep - stosb - - /* Setup the stack */ - leaq boot_stack_end(%rip), %rsp - - /* zero EFLAGS after setting rsp */ - pushq $0 - popfq + shrq $3, %rcx + rep stosq /* * Do the decompression, and jump to the new kernel.. */ - pushq %rsi # Save the real mode argument - movq %rsi, %rdi # real mode address - leaq boot_heap(%rip), %rsi # malloc area for uncompression - leaq input_data(%rip), %rdx # input_data - movl input_len(%rip), %eax - movq %rax, %rcx # input_len - movq %rbp, %r8 # output + pushq %rsi /* Save the real mode argument */ + movq %rsi, %rdi /* real mode address */ + leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ + leaq input_data(%rip), %rdx /* input_data */ + movl $z_input_len, %ecx /* input_len */ + movq %rbp, %r8 /* output target address */ call decompress_kernel popq %rsi @@ -311,11 +308,21 @@ gdt: .quad 0x0000000000000000 /* TS continued */ gdt_end: -.bss -/* Stack and heap for uncompression */ -.balign 4 +/* + * Stack and heap for uncompression + */ + .bss + .balign 4 boot_heap: .fill BOOT_HEAP_SIZE, 1, 0 boot_stack: .fill BOOT_STACK_SIZE, 1, 0 boot_stack_end: + +/* + * Space for page tables (not in .bss so not zeroed) + */ + .section ".pgtable","a",@nobits + .balign 4096 +pgtable: + .fill 6*4096, 1, 0 diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index e45be73684ff..842b2a36174a 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -325,21 +325,19 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, free_mem_ptr = heap; /* Heap */ free_mem_end_ptr = heap + BOOT_HEAP_SIZE; + if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) + error("Destination address inappropriately aligned"); #ifdef CONFIG_X86_64 - if ((unsigned long)output & (__KERNEL_ALIGN - 1)) - error("Destination address not 2M aligned"); - if ((unsigned long)output >= 0xffffffffffUL) + if (heap > 0x3fffffffffffUL) error("Destination address too large"); #else - if ((u32)output & (CONFIG_PHYSICAL_ALIGN - 1)) - error("Destination address not CONFIG_PHYSICAL_ALIGN aligned"); if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff)) error("Destination address too large"); +#endif #ifndef CONFIG_RELOCATABLE - if ((u32)output != LOAD_PHYSICAL_ADDR) + if ((unsigned long)output != LOAD_PHYSICAL_ADDR) error("Wrong destination address"); #endif -#endif if (!quiet) putstr("\nDecompressing Linux... "); diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c new file mode 100644 index 000000000000..bcbd36c41432 --- /dev/null +++ b/arch/x86/boot/compressed/mkpiggy.c @@ -0,0 +1,97 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright (C) 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * H. Peter Anvin <hpa@linux.intel.com> + * + * ----------------------------------------------------------------------- */ + +/* + * Compute the desired load offset from a compressed program; outputs + * a small assembly wrapper with the appropriate symbols defined. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> + +static uint32_t getle32(const void *p) +{ + const uint8_t *cp = p; + + return (uint32_t)cp[0] + ((uint32_t)cp[1] << 8) + + ((uint32_t)cp[2] << 16) + ((uint32_t)cp[3] << 24); +} + +int main(int argc, char *argv[]) +{ + uint32_t olen; + long ilen; + unsigned long offs; + FILE *f; + + if (argc < 2) { + fprintf(stderr, "Usage: %s compressed_file\n", argv[0]); + return 1; + } + + /* Get the information for the compressed kernel image first */ + + f = fopen(argv[1], "r"); + if (!f) { + perror(argv[1]); + return 1; + } + + + if (fseek(f, -4L, SEEK_END)) { + perror(argv[1]); + } + fread(&olen, sizeof olen, 1, f); + ilen = ftell(f); + olen = getle32(&olen); + fclose(f); + + /* + * Now we have the input (compressed) and output (uncompressed) + * sizes, compute the necessary decompression offset... + */ + + offs = (olen > ilen) ? olen - ilen : 0; + offs += olen >> 12; /* Add 8 bytes for each 32K block */ + offs += 32*1024 + 18; /* Add 32K + 18 bytes slack */ + offs = (offs+4095) & ~4095; /* Round to a 4K boundary */ + + printf(".section \".rodata.compressed\",\"a\",@progbits\n"); + printf(".globl z_input_len\n"); + printf("z_input_len = %lu\n", ilen); + printf(".globl z_output_len\n"); + printf("z_output_len = %lu\n", (unsigned long)olen); + printf(".globl z_extract_offset\n"); + printf("z_extract_offset = 0x%lx\n", offs); + /* z_extract_offset_negative allows simplification of head_32.S */ + printf(".globl z_extract_offset_negative\n"); + printf("z_extract_offset_negative = -0x%lx\n", offs); + + printf(".globl input_data, input_data_end\n"); + printf("input_data:\n"); + printf(".incbin \"%s\"\n", argv[1]); + printf("input_data_end:\n"); + + return 0; +} diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c index 857e492c571e..bbeb0c3fbd90 100644 --- a/arch/x86/boot/compressed/relocs.c +++ b/arch/x86/boot/compressed/relocs.c @@ -504,8 +504,11 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) if (sym->st_shndx == SHN_ABS) { continue; } - if (r_type == R_386_PC32) { - /* PC relative relocations don't need to be adjusted */ + if (r_type == R_386_NONE || r_type == R_386_PC32) { + /* + * NONE can be ignored and and PC relative + * relocations don't need to be adjusted. + */ } else if (r_type == R_386_32) { /* Visit relocations that need to be adjusted */ diff --git a/arch/x86/boot/compressed/vmlinux_64.lds b/arch/x86/boot/compressed/vmlinux.lds.S index bef1ac891bce..cc353e1b3ffd 100644 --- a/arch/x86/boot/compressed/vmlinux_64.lds +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -1,6 +1,17 @@ -OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") +OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) + +#undef i386 + +#include <asm/page_types.h> + +#ifdef CONFIG_X86_64 OUTPUT_ARCH(i386:x86-64) ENTRY(startup_64) +#else +OUTPUT_ARCH(i386) +ENTRY(startup_32) +#endif + SECTIONS { /* Be careful parts of head_64.S assume startup_32 is at @@ -33,16 +44,22 @@ SECTIONS *(.data.*) _edata = . ; } + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); .bss : { _bss = . ; *(.bss) *(.bss.*) *(COMMON) - . = ALIGN(8); - _end_before_pgt = . ; - . = ALIGN(4096); - pgtable = . ; - . = . + 4096 * 6; + . = ALIGN(8); /* For convenience during zeroing */ _ebss = .; } +#ifdef CONFIG_X86_64 + . = ALIGN(PAGE_SIZE); + .pgtable : { + _pgtable = . ; + *(.pgtable) + _epgtable = . ; + } +#endif + _end = .; } diff --git a/arch/x86/boot/compressed/vmlinux.scr b/arch/x86/boot/compressed/vmlinux.scr deleted file mode 100644 index f02382ae5c48..000000000000 --- a/arch/x86/boot/compressed/vmlinux.scr +++ /dev/null @@ -1,10 +0,0 @@ -SECTIONS -{ - .rodata.compressed : { - input_len = .; - LONG(input_data_end - input_data) input_data = .; - *(.data) - output_len = . - 4; - input_data_end = .; - } -} diff --git a/arch/x86/boot/compressed/vmlinux_32.lds b/arch/x86/boot/compressed/vmlinux_32.lds deleted file mode 100644 index bb3c48379c40..000000000000 --- a/arch/x86/boot/compressed/vmlinux_32.lds +++ /dev/null @@ -1,43 +0,0 @@ -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(startup_32) -SECTIONS -{ - /* Be careful parts of head_32.S assume startup_32 is at - * address 0. - */ - . = 0; - .text.head : { - _head = . ; - *(.text.head) - _ehead = . ; - } - .rodata.compressed : { - *(.rodata.compressed) - } - .text : { - _text = .; /* Text */ - *(.text) - *(.text.*) - _etext = . ; - } - .rodata : { - _rodata = . ; - *(.rodata) /* read-only data */ - *(.rodata.*) - _erodata = . ; - } - .data : { - _data = . ; - *(.data) - *(.data.*) - _edata = . ; - } - .bss : { - _bss = . ; - *(.bss) - *(.bss.*) - *(COMMON) - _end = . ; - } -} diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c index 1aae8f3e5ca1..c501a5b466f8 100644 --- a/arch/x86/boot/edd.c +++ b/arch/x86/boot/edd.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -22,17 +23,17 @@ */ static int read_mbr(u8 devno, void *buf) { - u16 ax, bx, cx, dx; + struct biosregs ireg, oreg; - ax = 0x0201; /* Legacy Read, one sector */ - cx = 0x0001; /* Sector 0-0-1 */ - dx = devno; - bx = (size_t)buf; - asm volatile("pushfl; stc; int $0x13; setc %%al; popfl" - : "+a" (ax), "+c" (cx), "+d" (dx), "+b" (bx) - : : "esi", "edi", "memory"); + initregs(&ireg); + ireg.ax = 0x0201; /* Legacy Read, one sector */ + ireg.cx = 0x0001; /* Sector 0-0-1 */ + ireg.dl = devno; + ireg.bx = (size_t)buf; - return -(u8)ax; /* 0 or -1 */ + intcall(0x13, &ireg, &oreg); + + return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */ } static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig) @@ -72,56 +73,46 @@ static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig) static int get_edd_info(u8 devno, struct edd_info *ei) { - u16 ax, bx, cx, dx, di; + struct biosregs ireg, oreg; memset(ei, 0, sizeof *ei); /* Check Extensions Present */ - ax = 0x4100; - bx = EDDMAGIC1; - dx = devno; - asm("pushfl; stc; int $0x13; setc %%al; popfl" - : "+a" (ax), "+b" (bx), "=c" (cx), "+d" (dx) - : : "esi", "edi"); + initregs(&ireg); + ireg.ah = 0x41; + ireg.bx = EDDMAGIC1; + ireg.dl = devno; + intcall(0x13, &ireg, &oreg); - if ((u8)ax) + if (oreg.eflags & X86_EFLAGS_CF) return -1; /* No extended information */ - if (bx != EDDMAGIC2) + if (oreg.bx != EDDMAGIC2) return -1; ei->device = devno; - ei->version = ax >> 8; /* EDD version number */ - ei->interface_support = cx; /* EDD functionality subsets */ + ei->version = oreg.ah; /* EDD version number */ + ei->interface_support = oreg.cx; /* EDD functionality subsets */ /* Extended Get Device Parameters */ ei->params.length = sizeof(ei->params); - ax = 0x4800; - dx = devno; - asm("pushfl; int $0x13; popfl" - : "+a" (ax), "+d" (dx), "=m" (ei->params) - : "S" (&ei->params) - : "ebx", "ecx", "edi"); + ireg.ah = 0x48; + ireg.si = (size_t)&ei->params; + intcall(0x13, &ireg, &oreg); /* Get legacy CHS parameters */ /* Ralf Brown recommends setting ES:DI to 0:0 */ - ax = 0x0800; - dx = devno; - di = 0; - asm("pushw %%es; " - "movw %%di,%%es; " - "pushfl; stc; int $0x13; setc %%al; popfl; " - "popw %%es" - : "+a" (ax), "=b" (bx), "=c" (cx), "+d" (dx), "+D" (di) - : : "esi"); - - if ((u8)ax == 0) { - ei->legacy_max_cylinder = (cx >> 8) + ((cx & 0xc0) << 2); - ei->legacy_max_head = dx >> 8; - ei->legacy_sectors_per_track = cx & 0x3f; + ireg.ah = 0x08; + ireg.es = 0; + intcall(0x13, &ireg, &oreg); + + if (!(oreg.eflags & X86_EFLAGS_CF)) { + ei->legacy_max_cylinder = oreg.ch + ((oreg.cl & 0xc0) << 2); + ei->legacy_max_head = oreg.dh; + ei->legacy_sectors_per_track = oreg.cl & 0x3f; } return 0; diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 5d84d1c74e4c..b31cc54b4641 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -22,7 +22,8 @@ #include <asm/page_types.h> #include <asm/setup.h> #include "boot.h" -#include "offsets.h" +#include "voffset.h" +#include "zoffset.h" BOOTSEG = 0x07C0 /* original address of boot-sector */ SYSSEG = 0x1000 /* historical load address >> 4 */ @@ -115,7 +116,7 @@ _start: # Part 2 of the header, from the old setup.S .ascii "HdrS" # header signature - .word 0x0209 # header version number (>= 0x0105) + .word 0x020a # header version number (>= 0x0105) # or else old loadlin-1.5 will fail) .globl realmode_swtch realmode_swtch: .word 0, 0 # default_switch, SETUPSEG @@ -168,7 +169,11 @@ heap_end_ptr: .word _end+STACK_SIZE-512 # end of setup code can be used by setup # for local heap purposes. -pad1: .word 0 +ext_loader_ver: + .byte 0 # Extended boot loader version +ext_loader_type: + .byte 0 # Extended boot loader type + cmd_line_ptr: .long 0 # (Header version 0x0202 or later) # If nonzero, a 32-bit pointer # to the kernel command line. @@ -200,7 +205,7 @@ relocatable_kernel: .byte 1 #else relocatable_kernel: .byte 0 #endif -pad2: .byte 0 +min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment pad3: .word 0 cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, @@ -212,16 +217,27 @@ hardware_subarch: .long 0 # subarchitecture, added with 2.07 hardware_subarch_data: .quad 0 -payload_offset: .long input_data -payload_length: .long input_data_end-input_data +payload_offset: .long ZO_input_data +payload_length: .long ZO_z_input_len setup_data: .quad 0 # 64-bit physical pointer to # single linked list of # struct setup_data +pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr + +#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset) +#define VO_INIT_SIZE (VO__end - VO__text) +#if ZO_INIT_SIZE > VO_INIT_SIZE +#define INIT_SIZE ZO_INIT_SIZE +#else +#define INIT_SIZE VO_INIT_SIZE +#endif +init_size: .long INIT_SIZE # kernel initialization size + # End of setup header ##################################################### - .section ".inittext", "ax" + .section ".entrytext", "ax" start_of_setup: #ifdef SAFE_RESET_DISK_CONTROLLER # Reset the disk controller. diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 58f0415d3ae0..140172b895bd 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -61,11 +62,10 @@ static void copy_boot_params(void) */ static void keyboard_set_repeat(void) { - u16 ax = 0x0305; - u16 bx = 0; - asm volatile("int $0x16" - : "+a" (ax), "+b" (bx) - : : "ecx", "edx", "esi", "edi"); + struct biosregs ireg; + initregs(&ireg); + ireg.ax = 0x0305; + intcall(0x16, &ireg, NULL); } /* @@ -73,18 +73,22 @@ static void keyboard_set_repeat(void) */ static void query_ist(void) { + struct biosregs ireg, oreg; + /* Some older BIOSes apparently crash on this call, so filter it from machines too old to have SpeedStep at all. */ if (cpu.level < 6) return; - asm("int $0x15" - : "=a" (boot_params.ist_info.signature), - "=b" (boot_params.ist_info.command), - "=c" (boot_params.ist_info.event), - "=d" (boot_params.ist_info.perf_level) - : "a" (0x0000e980), /* IST Support */ - "d" (0x47534943)); /* Request value */ + initregs(&ireg); + ireg.ax = 0xe980; /* IST Support */ + ireg.edx = 0x47534943; /* Request value */ + intcall(0x15, &ireg, &oreg); + + boot_params.ist_info.signature = oreg.eax; + boot_params.ist_info.command = oreg.ebx; + boot_params.ist_info.event = oreg.ecx; + boot_params.ist_info.perf_level = oreg.edx; } /* @@ -93,13 +97,12 @@ static void query_ist(void) static void set_bios_mode(void) { #ifdef CONFIG_X86_64 - u32 eax, ebx; + struct biosregs ireg; - eax = 0xec00; - ebx = 2; - asm volatile("int $0x15" - : "+a" (eax), "+b" (ebx) - : : "ecx", "edx", "esi", "edi"); + initregs(&ireg); + ireg.ax = 0xec00; + ireg.bx = 2; + intcall(0x15, &ireg, NULL); #endif } diff --git a/arch/x86/boot/mca.c b/arch/x86/boot/mca.c index 911eaae5d696..a95a531148ef 100644 --- a/arch/x86/boot/mca.c +++ b/arch/x86/boot/mca.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -16,26 +17,22 @@ int query_mca(void) { - u8 err; - u16 es, bx, len; - - asm("pushw %%es ; " - "int $0x15 ; " - "setc %0 ; " - "movw %%es, %1 ; " - "popw %%es" - : "=acd" (err), "=acdSD" (es), "=b" (bx) - : "a" (0xc000)); - - if (err) + struct biosregs ireg, oreg; + u16 len; + + initregs(&ireg); + ireg.ah = 0xc0; + intcall(0x15, &ireg, &oreg); + + if (oreg.eflags & X86_EFLAGS_CF) return -1; /* No MCA present */ - set_fs(es); - len = rdfs16(bx); + set_fs(oreg.es); + len = rdfs16(oreg.bx); if (len > sizeof(boot_params.sys_desc_table)) len = sizeof(boot_params.sys_desc_table); - copy_from_fs(&boot_params.sys_desc_table, bx, len); + copy_from_fs(&boot_params.sys_desc_table, oreg.bx, len); return 0; } diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index 5054c2ddd1a0..cae3feb1035e 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c @@ -17,43 +17,41 @@ #define SMAP 0x534d4150 /* ASCII "SMAP" */ -struct e820_ext_entry { - struct e820entry std; - u32 ext_flags; -} __attribute__((packed)); - static int detect_memory_e820(void) { int count = 0; - u32 next = 0; - u32 size, id, edi; - u8 err; + struct biosregs ireg, oreg; struct e820entry *desc = boot_params.e820_map; - static struct e820_ext_entry buf; /* static so it is zeroed */ + static struct e820entry buf; /* static so it is zeroed */ + + initregs(&ireg); + ireg.ax = 0xe820; + ireg.cx = sizeof buf; + ireg.edx = SMAP; + ireg.di = (size_t)&buf; /* - * Set this here so that if the BIOS doesn't change this field - * but still doesn't change %ecx, we're still okay... + * Note: at least one BIOS is known which assumes that the + * buffer pointed to by one e820 call is the same one as + * the previous call, and only changes modified fields. Therefore, + * we use a temporary buffer and copy the results entry by entry. + * + * This routine deliberately does not try to account for + * ACPI 3+ extended attributes. This is because there are + * BIOSes in the field which report zero for the valid bit for + * all ranges, and we don't currently make any use of the + * other attribute bits. Revisit this if we see the extended + * attribute bits deployed in a meaningful way in the future. */ - buf.ext_flags = 1; do { - size = sizeof buf; - - /* Important: %edx and %esi are clobbered by some BIOSes, - so they must be either used for the error output - or explicitly marked clobbered. Given that, assume there - is something out there clobbering %ebp and %edi, too. */ - asm("pushl %%ebp; int $0x15; popl %%ebp; setc %0" - : "=d" (err), "+b" (next), "=a" (id), "+c" (size), - "=D" (edi), "+m" (buf) - : "D" (&buf), "d" (SMAP), "a" (0xe820) - : "esi"); + intcall(0x15, &ireg, &oreg); + ireg.ebx = oreg.ebx; /* for next iteration... */ /* BIOSes which terminate the chain with CF = 1 as opposed to %ebx = 0 don't always report the SMAP signature on the final, failing, probe. */ - if (err) + if (oreg.eflags & X86_EFLAGS_CF) break; /* Some BIOSes stop returning SMAP in the middle of @@ -61,66 +59,64 @@ static int detect_memory_e820(void) screwed up the map at that point, we might have a partial map, the full map, or complete garbage, so just return failure. */ - if (id != SMAP) { + if (oreg.eax != SMAP) { count = 0; break; } - /* ACPI 3.0 added the extended flags support. If bit 0 - in the extended flags is zero, we're supposed to simply - ignore the entry -- a backwards incompatible change! */ - if (size > 20 && !(buf.ext_flags & 1)) - continue; - - *desc++ = buf.std; + *desc++ = buf; count++; - } while (next && count < ARRAY_SIZE(boot_params.e820_map)); + } while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map)); return boot_params.e820_entries = count; } static int detect_memory_e801(void) { - u16 ax, bx, cx, dx; - u8 err; + struct biosregs ireg, oreg; - bx = cx = dx = 0; - ax = 0xe801; - asm("stc; int $0x15; setc %0" - : "=m" (err), "+a" (ax), "+b" (bx), "+c" (cx), "+d" (dx)); + initregs(&ireg); + ireg.ax = 0xe801; + intcall(0x15, &ireg, &oreg); - if (err) + if (oreg.eflags & X86_EFLAGS_CF) return -1; /* Do we really need to do this? */ - if (cx || dx) { - ax = cx; - bx = dx; + if (oreg.cx || oreg.dx) { + oreg.ax = oreg.cx; + oreg.bx = oreg.dx; } - if (ax > 15*1024) + if (oreg.ax > 15*1024) { return -1; /* Bogus! */ - - /* This ignores memory above 16MB if we have a memory hole - there. If someone actually finds a machine with a memory - hole at 16MB and no support for 0E820h they should probably - generate a fake e820 map. */ - boot_params.alt_mem_k = (ax == 15*1024) ? (dx << 6)+ax : ax; + } else if (oreg.ax == 15*1024) { + boot_params.alt_mem_k = (oreg.dx << 6) + oreg.ax; + } else { + /* + * This ignores memory above 16MB if we have a memory + * hole there. If someone actually finds a machine + * with a memory hole at 16MB and no support for + * 0E820h they should probably generate a fake e820 + * map. + */ + boot_params.alt_mem_k = oreg.ax; + } return 0; } static int detect_memory_88(void) { - u16 ax; - u8 err; + struct biosregs ireg, oreg; - ax = 0x8800; - asm("stc; int $0x15; setc %0" : "=bcdm" (err), "+a" (ax)); + initregs(&ireg); + ireg.ah = 0x88; + intcall(0x15, &ireg, &oreg); - boot_params.screen_info.ext_mem_k = ax; + boot_params.screen_info.ext_mem_k = oreg.ax; - return -err; + return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */ } int detect_memory(void) diff --git a/arch/x86/boot/regs.c b/arch/x86/boot/regs.c new file mode 100644 index 000000000000..958019b1cfa5 --- /dev/null +++ b/arch/x86/boot/regs.c @@ -0,0 +1,29 @@ +/* ----------------------------------------------------------------------- + * + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2 or (at your + * option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * Simple helper function for initializing a register set. + * + * Note that this sets EFLAGS_CF in the input register set; this + * makes it easier to catch functions which do nothing but don't + * explicitly set CF. + */ + +#include "boot.h" + +void initregs(struct biosregs *reg) +{ + memset(reg, 0, sizeof *reg); + reg->eflags |= X86_EFLAGS_CF; + reg->ds = ds(); + reg->es = ds(); + reg->fs = fs(); + reg->gs = gs(); +} diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index bb8dc2de7969..0f6ec455a2b1 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld @@ -15,8 +15,11 @@ SECTIONS . = 497; .header : { *(.header) } + .entrytext : { *(.entrytext) } .inittext : { *(.inittext) } .initdata : { *(.initdata) } + __end_init = .; + .text : { *(.text) } .text32 : { *(.text32) } @@ -52,4 +55,7 @@ SECTIONS . = ASSERT(_end <= 0x8000, "Setup too big!"); . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); + /* Necessary for the very-old-loader check to work... */ + . = ASSERT(__end_init <= 5*512, "init sections too big!"); + } diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c index 7e8e8b25f5f6..01ec69c901c7 100644 --- a/arch/x86/boot/tty.c +++ b/arch/x86/boot/tty.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -22,24 +23,23 @@ void __attribute__((section(".inittext"))) putchar(int ch) { - unsigned char c = ch; + struct biosregs ireg; - if (c == '\n') + if (ch == '\n') putchar('\r'); /* \n -> \r\n */ - /* int $0x10 is known to have bugs involving touching registers - it shouldn't. Be extra conservative... */ - asm volatile("pushal; pushw %%ds; int $0x10; popw %%ds; popal" - : : "b" (0x0007), "c" (0x0001), "a" (0x0e00|ch)); + initregs(&ireg); + ireg.bx = 0x0007; + ireg.cx = 0x0001; + ireg.ah = 0x0e; + ireg.al = ch; + intcall(0x10, &ireg, NULL); } void __attribute__((section(".inittext"))) puts(const char *str) { - int n = 0; - while (*str) { + while (*str) putchar(*str++); - n++; - } } /* @@ -49,14 +49,13 @@ void __attribute__((section(".inittext"))) puts(const char *str) static u8 gettime(void) { - u16 ax = 0x0200; - u16 cx, dx; + struct biosregs ireg, oreg; - asm volatile("int $0x1a" - : "+a" (ax), "=c" (cx), "=d" (dx) - : : "ebx", "esi", "edi"); + initregs(&ireg); + ireg.ah = 0x02; + intcall(0x1a, &ireg, &oreg); - return dx >> 8; + return oreg.dh; } /* @@ -64,19 +63,24 @@ static u8 gettime(void) */ int getchar(void) { - u16 ax = 0; - asm volatile("int $0x16" : "+a" (ax)); + struct biosregs ireg, oreg; + + initregs(&ireg); + /* ireg.ah = 0x00; */ + intcall(0x16, &ireg, &oreg); - return ax & 0xff; + return oreg.al; } static int kbd_pending(void) { - u8 pending; - asm volatile("int $0x16; setnz %0" - : "=qm" (pending) - : "a" (0x0100)); - return pending; + struct biosregs ireg, oreg; + + initregs(&ireg); + ireg.ah = 0x01; + intcall(0x16, &ireg, &oreg); + + return !(oreg.eflags & X86_EFLAGS_ZF); } void kbd_flush(void) diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c index 3fa979c9c363..d660be492363 100644 --- a/arch/x86/boot/video-bios.c +++ b/arch/x86/boot/video-bios.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -29,21 +30,21 @@ static int bios_set_mode(struct mode_info *mi) static int set_bios_mode(u8 mode) { - u16 ax; + struct biosregs ireg, oreg; u8 new_mode; - ax = mode; /* AH=0x00 Set Video Mode */ - asm volatile(INT10 - : "+a" (ax) - : : "ebx", "ecx", "edx", "esi", "edi"); + initregs(&ireg); + ireg.al = mode; /* AH=0x00 Set Video Mode */ + intcall(0x10, &ireg, NULL); - ax = 0x0f00; /* Get Current Video Mode */ - asm volatile(INT10 - : "+a" (ax) - : : "ebx", "ecx", "edx", "esi", "edi"); + + ireg.ah = 0x0f; /* Get Current Video Mode */ + intcall(0x10, &ireg, &oreg); do_restore = 1; /* Assume video contents were lost */ - new_mode = ax & 0x7f; /* Not all BIOSes are clean with the top bit */ + + /* Not all BIOSes are clean with the top bit */ + new_mode = ireg.al & 0x7f; if (new_mode == mode) return 0; /* Mode change OK */ @@ -53,10 +54,8 @@ static int set_bios_mode(u8 mode) /* Mode setting failed, but we didn't end up where we started. That's bad. Try to revert to the original video mode. */ - ax = boot_params.screen_info.orig_video_mode; - asm volatile(INT10 - : "+a" (ax) - : : "ebx", "ecx", "edx", "esi", "edi"); + ireg.ax = boot_params.screen_info.orig_video_mode; + intcall(0x10, &ireg, NULL); } #endif return -1; diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index 4a58c8ce3f69..c700147d6ffb 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -31,7 +32,7 @@ static inline void vesa_store_mode_params_graphics(void) {} static int vesa_probe(void) { #if defined(CONFIG_VIDEO_VESA) || defined(CONFIG_FIRMWARE_EDID) - u16 ax, cx, di; + struct biosregs ireg, oreg; u16 mode; addr_t mode_ptr; struct mode_info *mi; @@ -39,13 +40,12 @@ static int vesa_probe(void) video_vesa.modes = GET_HEAP(struct mode_info, 0); - ax = 0x4f00; - di = (size_t)&vginfo; - asm(INT10 - : "+a" (ax), "+D" (di), "=m" (vginfo) - : : "ebx", "ecx", "edx", "esi"); + initregs(&ireg); + ireg.ax = 0x4f00; + ireg.di = (size_t)&vginfo; + intcall(0x10, &ireg, &oreg); - if (ax != 0x004f || + if (ireg.ax != 0x004f || vginfo.signature != VESA_MAGIC || vginfo.version < 0x0102) return 0; /* Not present */ @@ -65,14 +65,12 @@ static int vesa_probe(void) memset(&vminfo, 0, sizeof vminfo); /* Just in case... */ - ax = 0x4f01; - cx = mode; - di = (size_t)&vminfo; - asm(INT10 - : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo) - : : "ebx", "edx", "esi"); + ireg.ax = 0x4f01; + ireg.cx = mode; + ireg.di = (size_t)&vminfo; + intcall(0x10, &ireg, &oreg); - if (ax != 0x004f) + if (ireg.ax != 0x004f) continue; if ((vminfo.mode_attr & 0x15) == 0x05) { @@ -111,20 +109,19 @@ static int vesa_probe(void) static int vesa_set_mode(struct mode_info *mode) { - u16 ax, bx, cx, di; + struct biosregs ireg, oreg; int is_graphic; u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA; memset(&vminfo, 0, sizeof vminfo); /* Just in case... */ - ax = 0x4f01; - cx = vesa_mode; - di = (size_t)&vminfo; - asm(INT10 - : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo) - : : "ebx", "edx", "esi"); + initregs(&ireg); + ireg.ax = 0x4f01; + ireg.cx = vesa_mode; + ireg.di = (size_t)&vminfo; + intcall(0x10, &ireg, &oreg); - if (ax != 0x004f) + if (oreg.ax != 0x004f) return -1; if ((vminfo.mode_attr & 0x15) == 0x05) { @@ -141,14 +138,12 @@ static int vesa_set_mode(struct mode_info *mode) } - ax = 0x4f02; - bx = vesa_mode; - di = 0; - asm volatile(INT10 - : "+a" (ax), "+b" (bx), "+D" (di) - : : "ecx", "edx", "esi"); + initregs(&ireg); + ireg.ax = 0x4f02; + ireg.bx = vesa_mode; + intcall(0x10, &ireg, &oreg); - if (ax != 0x004f) + if (oreg.ax != 0x004f) return -1; graphic_mode = is_graphic; @@ -171,50 +166,45 @@ static int vesa_set_mode(struct mode_info *mode) /* Switch DAC to 8-bit mode */ static void vesa_dac_set_8bits(void) { + struct biosregs ireg, oreg; u8 dac_size = 6; /* If possible, switch the DAC to 8-bit mode */ if (vginfo.capabilities & 1) { - u16 ax, bx; - - ax = 0x4f08; - bx = 0x0800; - asm volatile(INT10 - : "+a" (ax), "+b" (bx) - : : "ecx", "edx", "esi", "edi"); - - if (ax == 0x004f) - dac_size = bx >> 8; + initregs(&ireg); + ireg.ax = 0x4f08; + ireg.bh = 0x08; + intcall(0x10, &ireg, &oreg); + if (oreg.ax == 0x004f) + dac_size = oreg.bh; } /* Set the color sizes to the DAC size, and offsets to 0 */ - boot_params.screen_info.red_size = dac_size; + boot_params.screen_info.red_size = dac_size; boot_params.screen_info.green_size = dac_size; - boot_params.screen_info.blue_size = dac_size; - boot_params.screen_info.rsvd_size = dac_size; + boot_params.screen_info.blue_size = dac_size; + boot_params.screen_info.rsvd_size = dac_size; - boot_params.screen_info.red_pos = 0; - boot_params.screen_info.green_pos = 0; - boot_params.screen_info.blue_pos = 0; - boot_params.screen_info.rsvd_pos = 0; + boot_params.screen_info.red_pos = 0; + boot_params.screen_info.green_pos = 0; + boot_params.screen_info.blue_pos = 0; + boot_params.screen_info.rsvd_pos = 0; } /* Save the VESA protected mode info */ static void vesa_store_pm_info(void) { - u16 ax, bx, di, es; + struct biosregs ireg, oreg; - ax = 0x4f0a; - bx = di = 0; - asm("pushw %%es; "INT10"; movw %%es,%0; popw %%es" - : "=d" (es), "+a" (ax), "+b" (bx), "+D" (di) - : : "ecx", "esi"); + initregs(&ireg); + ireg.ax = 0x4f0a; + intcall(0x10, &ireg, &oreg); - if (ax != 0x004f) + if (oreg.ax != 0x004f) return; - boot_params.screen_info.vesapm_seg = es; - boot_params.screen_info.vesapm_off = di; + boot_params.screen_info.vesapm_seg = oreg.es; + boot_params.screen_info.vesapm_off = oreg.di; } /* @@ -252,7 +242,7 @@ static void vesa_store_mode_params_graphics(void) void vesa_store_edid(void) { #ifdef CONFIG_FIRMWARE_EDID - u16 ax, bx, cx, dx, di; + struct biosregs ireg, oreg; /* Apparently used as a nonsense token... */ memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info); @@ -260,33 +250,26 @@ void vesa_store_edid(void) if (vginfo.version < 0x0200) return; /* EDID requires VBE 2.0+ */ - ax = 0x4f15; /* VBE DDC */ - bx = 0x0000; /* Report DDC capabilities */ - cx = 0; /* Controller 0 */ - di = 0; /* ES:DI must be 0 by spec */ - - /* Note: The VBE DDC spec is different from the main VESA spec; - we genuinely have to assume all registers are destroyed here. */ - - asm("pushw %%es; movw %2,%%es; "INT10"; popw %%es" - : "+a" (ax), "+b" (bx), "+c" (cx), "+D" (di) - : : "esi", "edx"); + initregs(&ireg); + ireg.ax = 0x4f15; /* VBE DDC */ + /* ireg.bx = 0x0000; */ /* Report DDC capabilities */ + /* ireg.cx = 0; */ /* Controller 0 */ + ireg.es = 0; /* ES:DI must be 0 by spec */ + intcall(0x10, &ireg, &oreg); - if (ax != 0x004f) + if (oreg.ax != 0x004f) return; /* No EDID */ /* BH = time in seconds to transfer EDD information */ /* BL = DDC level supported */ - ax = 0x4f15; /* VBE DDC */ - bx = 0x0001; /* Read EDID */ - cx = 0; /* Controller 0 */ - dx = 0; /* EDID block number */ - di =(size_t) &boot_params.edid_info; /* (ES:)Pointer to block */ - asm(INT10 - : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info), - "+c" (cx), "+D" (di) - : : "esi"); + ireg.ax = 0x4f15; /* VBE DDC */ + ireg.bx = 0x0001; /* Read EDID */ + /* ireg.cx = 0; */ /* Controller 0 */ + /* ireg.dx = 0; */ /* EDID block number */ + ireg.es = ds(); + ireg.di =(size_t)&boot_params.edid_info; /* (ES:)Pointer to block */ + intcall(0x10, &ireg, &oreg); #endif /* CONFIG_FIRMWARE_EDID */ } diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index 9e0587a37768..8f8d827e254d 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -39,30 +40,30 @@ static __videocard video_vga; /* Set basic 80x25 mode */ static u8 vga_set_basic_mode(void) { + struct biosregs ireg, oreg; u16 ax; u8 rows; u8 mode; + initregs(&ireg); + #ifdef CONFIG_VIDEO_400_HACK if (adapter >= ADAPTER_VGA) { - asm volatile(INT10 - : : "a" (0x1202), "b" (0x0030) - : "ecx", "edx", "esi", "edi"); + ireg.ax = 0x1202; + ireg.bx = 0x0030; + intcall(0x10, &ireg, NULL); } #endif ax = 0x0f00; - asm volatile(INT10 - : "+a" (ax) - : : "ebx", "ecx", "edx", "esi", "edi"); - - mode = (u8)ax; + intcall(0x10, &ireg, &oreg); + mode = oreg.al; set_fs(0); rows = rdfs8(0x484); /* rows minus one */ #ifndef CONFIG_VIDEO_400_HACK - if ((ax == 0x5003 || ax == 0x5007) && + if ((oreg.ax == 0x5003 || oreg.ax == 0x5007) && (rows == 0 || rows == 24)) return mode; #endif @@ -71,10 +72,8 @@ static u8 vga_set_basic_mode(void) mode = 3; /* Set the mode */ - ax = mode; - asm volatile(INT10 - : "+a" (ax) - : : "ebx", "ecx", "edx", "esi", "edi"); + ireg.ax = mode; /* AH=0: set mode */ + intcall(0x10, &ireg, NULL); do_restore = 1; return mode; } @@ -82,43 +81,69 @@ static u8 vga_set_basic_mode(void) static void vga_set_8font(void) { /* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */ + struct biosregs ireg; + + initregs(&ireg); /* Set 8x8 font */ - asm volatile(INT10 : : "a" (0x1112), "b" (0)); + ireg.ax = 0x1112; + /* ireg.bl = 0; */ + intcall(0x10, &ireg, NULL); /* Use alternate print screen */ - asm volatile(INT10 : : "a" (0x1200), "b" (0x20)); + ireg.ax = 0x1200; + ireg.bl = 0x20; + intcall(0x10, &ireg, NULL); /* Turn off cursor emulation */ - asm volatile(INT10 : : "a" (0x1201), "b" (0x34)); + ireg.ax = 0x1201; + ireg.bl = 0x34; + intcall(0x10, &ireg, NULL); /* Cursor is scan lines 6-7 */ - asm volatile(INT10 : : "a" (0x0100), "c" (0x0607)); + ireg.ax = 0x0100; + ireg.cx = 0x0607; + intcall(0x10, &ireg, NULL); } static void vga_set_14font(void) { /* Set 9x14 font - 80x28 on VGA */ + struct biosregs ireg; + + initregs(&ireg); /* Set 9x14 font */ - asm volatile(INT10 : : "a" (0x1111), "b" (0)); + ireg.ax = 0x1111; + /* ireg.bl = 0; */ + intcall(0x10, &ireg, NULL); /* Turn off cursor emulation */ - asm volatile(INT10 : : "a" (0x1201), "b" (0x34)); + ireg.ax = 0x1201; + ireg.bl = 0x34; + intcall(0x10, &ireg, NULL); /* Cursor is scan lines 11-12 */ - asm volatile(INT10 : : "a" (0x0100), "c" (0x0b0c)); + ireg.ax = 0x0100; + ireg.cx = 0x0b0c; + intcall(0x10, &ireg, NULL); } static void vga_set_80x43(void) { /* Set 80x43 mode on VGA (not EGA) */ + struct biosregs ireg; + + initregs(&ireg); /* Set 350 scans */ - asm volatile(INT10 : : "a" (0x1201), "b" (0x30)); + ireg.ax = 0x1201; + ireg.bl = 0x30; + intcall(0x10, &ireg, NULL); /* Reset video mode */ - asm volatile(INT10 : : "a" (0x0003)); + ireg.ax = 0x0003; + intcall(0x10, &ireg, NULL); vga_set_8font(); } @@ -225,8 +250,6 @@ static int vga_set_mode(struct mode_info *mode) */ static int vga_probe(void) { - u16 ega_bx; - static const char *card_name[] = { "CGA/MDA/HGC", "EGA", "VGA" }; @@ -240,26 +263,26 @@ static int vga_probe(void) sizeof(ega_modes)/sizeof(struct mode_info), sizeof(vga_modes)/sizeof(struct mode_info), }; - u8 vga_flag; - asm(INT10 - : "=b" (ega_bx) - : "a" (0x1200), "b" (0x10) /* Check EGA/VGA */ - : "ecx", "edx", "esi", "edi"); + struct biosregs ireg, oreg; + + initregs(&ireg); + + ireg.ax = 0x1200; + ireg.bl = 0x10; /* Check EGA/VGA */ + intcall(0x10, &ireg, &oreg); #ifndef _WAKEUP - boot_params.screen_info.orig_video_ega_bx = ega_bx; + boot_params.screen_info.orig_video_ega_bx = oreg.bx; #endif /* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */ - if ((u8)ega_bx != 0x10) { + if (oreg.bl != 0x10) { /* EGA/VGA */ - asm(INT10 - : "=a" (vga_flag) - : "a" (0x1a00) - : "ebx", "ecx", "edx", "esi", "edi"); + ireg.ax = 0x1a00; + intcall(0x10, &ireg, &oreg); - if (vga_flag == 0x1a) { + if (oreg.al == 0x1a) { adapter = ADAPTER_VGA; #ifndef _WAKEUP boot_params.screen_info.orig_video_isVGA = 1; diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index 3bef2c1febe9..bad728b76fc2 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -18,33 +19,29 @@ static void store_cursor_position(void) { - u16 curpos; - u16 ax, bx; + struct biosregs ireg, oreg; - ax = 0x0300; - bx = 0; - asm(INT10 - : "=d" (curpos), "+a" (ax), "+b" (bx) - : : "ecx", "esi", "edi"); + initregs(&ireg); + ireg.ah = 0x03; + intcall(0x10, &ireg, &oreg); - boot_params.screen_info.orig_x = curpos; - boot_params.screen_info.orig_y = curpos >> 8; + boot_params.screen_info.orig_x = oreg.dl; + boot_params.screen_info.orig_y = oreg.dh; } static void store_video_mode(void) { - u16 ax, page; + struct biosregs ireg, oreg; /* N.B.: the saving of the video page here is a bit silly, since we pretty much assume page 0 everywhere. */ - ax = 0x0f00; - asm(INT10 - : "+a" (ax), "=b" (page) - : : "ecx", "edx", "esi", "edi"); + initregs(&ireg); + ireg.ah = 0x0f; + intcall(0x10, &ireg, &oreg); /* Not all BIOSes are clean with respect to the top bit */ - boot_params.screen_info.orig_video_mode = ax & 0x7f; - boot_params.screen_info.orig_video_page = page >> 8; + boot_params.screen_info.orig_video_mode = oreg.al & 0x7f; + boot_params.screen_info.orig_video_page = oreg.bh; } /* @@ -257,7 +254,7 @@ static void restore_screen(void) int y; addr_t dst = 0; u16 *src = saved.data; - u16 ax, bx, dx; + struct biosregs ireg; if (graphic_mode) return; /* Can't restore onto a graphic mode */ @@ -296,12 +293,11 @@ static void restore_screen(void) } /* Restore cursor position */ - ax = 0x0200; /* Set cursor position */ - bx = 0; /* Page number (<< 8) */ - dx = (saved.cury << 8)+saved.curx; - asm volatile(INT10 - : "+a" (ax), "+b" (bx), "+d" (dx) - : : "ecx", "esi", "edi"); + initregs(&ireg); + ireg.ah = 0x02; /* Set cursor position */ + ireg.dh = saved.cury; + ireg.dl = saved.curx; + intcall(0x10, &ireg, NULL); } #else #define save_screen() ((void)0) diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h index ee63f5d14461..5bb174a997fc 100644 --- a/arch/x86/boot/video.h +++ b/arch/x86/boot/video.h @@ -112,20 +112,6 @@ extern int force_x, force_y; /* Don't query the BIOS for cols/rows */ extern int do_restore; /* Restore screen contents */ extern int graphic_mode; /* Graphics mode with linear frame buffer */ -/* - * int $0x10 is notorious for touching registers it shouldn't. - * gcc doesn't like %ebp being clobbered, so define it as a push/pop - * sequence here. - * - * A number of systems, including the original PC can clobber %bp in - * certain circumstances, like when scrolling. There exists at least - * one Trident video card which could clobber DS under a set of - * circumstances that we are unlikely to encounter (scrolling when - * using an extended graphics mode of more than 800x600 pixels), but - * it's cheap insurance to deal with that here. - */ -#define INT10 "pushl %%ebp; pushw %%ds; int $0x10; popw %%ds; popl %%ebp" - /* Accessing VGA indexed registers */ static inline u8 in_idx(u16 port, u8 index) { diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 235b81d0f6f2..edb992ebef92 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -1,12 +1,13 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29-rc4 -# Tue Feb 24 15:50:58 2009 +# Linux kernel version: 2.6.30-rc2 +# Mon May 11 16:21:55 2009 # # CONFIG_64BIT is not set CONFIG_X86_32=y # CONFIG_X86_64 is not set CONFIG_X86=y +CONFIG_OUTPUT_FORMAT="elf32-i386" CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig" CONFIG_GENERIC_TIME=y CONFIG_GENERIC_CMOS_UPDATE=y @@ -33,6 +34,7 @@ CONFIG_ARCH_HAS_CPU_RELAX=y CONFIG_ARCH_HAS_DEFAULT_IDLE=y CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y CONFIG_HAVE_SETUP_PER_CPU_AREA=y +CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y # CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set CONFIG_ARCH_HIBERNATION_POSSIBLE=y CONFIG_ARCH_SUSPEND_POSSIBLE=y @@ -40,15 +42,16 @@ CONFIG_ARCH_SUSPEND_POSSIBLE=y CONFIG_ARCH_POPULATES_NODE_MAP=y # CONFIG_AUDIT_ARCH is not set CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y +CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_GENERIC_PENDING_IRQ=y -CONFIG_X86_SMP=y CONFIG_USE_GENERIC_SMP_HELPERS=y CONFIG_X86_32_SMP=y CONFIG_X86_HT=y -CONFIG_X86_BIOS_REBOOT=y CONFIG_X86_TRAMPOLINE=y +CONFIG_X86_32_LAZY_GS=y CONFIG_KTIME_SCALAR=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" @@ -60,10 +63,17 @@ CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_LOCALVERSION="" # CONFIG_LOCALVERSION_AUTO is not set +CONFIG_HAVE_KERNEL_GZIP=y +CONFIG_HAVE_KERNEL_BZIP2=y +CONFIG_HAVE_KERNEL_LZMA=y +CONFIG_KERNEL_GZIP=y +# CONFIG_KERNEL_BZIP2 is not set +# CONFIG_KERNEL_LZMA is not set CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y CONFIG_BSD_PROCESS_ACCT=y # CONFIG_BSD_PROCESS_ACCT_V3 is not set CONFIG_TASKSTATS=y @@ -113,23 +123,26 @@ CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" +CONFIG_RD_GZIP=y +CONFIG_RD_BZIP2=y +CONFIG_RD_LZMA=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y # CONFIG_EMBEDDED is not set CONFIG_UID16=y CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_PCSPKR_PLATFORM=y -# CONFIG_COMPAT_BRK is not set CONFIG_BASE_FULL=y CONFIG_FUTEX=y -CONFIG_ANON_INODES=y CONFIG_EPOLL=y CONFIG_SIGNALFD=y CONFIG_TIMERFD=y @@ -139,6 +152,7 @@ CONFIG_AIO=y CONFIG_VM_EVENT_COUNTERS=y CONFIG_PCI_QUIRKS=y CONFIG_SLUB_DEBUG=y +# CONFIG_COMPAT_BRK is not set # CONFIG_SLAB is not set CONFIG_SLUB=y # CONFIG_SLOB is not set @@ -154,6 +168,8 @@ CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -167,7 +183,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y # CONFIG_LBD is not set -CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BLK_DEV_BSG=y # CONFIG_BLK_DEV_INTEGRITY is not set @@ -194,12 +209,12 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_GENERIC_CLOCKEVENTS_BUILD=y CONFIG_SMP=y CONFIG_SPARSE_IRQ=y -CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y +# CONFIG_X86_BIGSMP is not set +CONFIG_X86_EXTENDED_PLATFORM=y # CONFIG_X86_ELAN is not set -# CONFIG_X86_GENERICARCH is not set -# CONFIG_X86_VSMP is not set # CONFIG_X86_RDC321X is not set +# CONFIG_X86_32_NON_STANDARD is not set CONFIG_SCHED_OMIT_FRAME_POINTER=y # CONFIG_PARAVIRT_GUEST is not set # CONFIG_MEMTEST is not set @@ -230,8 +245,10 @@ CONFIG_M686=y # CONFIG_GENERIC_CPU is not set CONFIG_X86_GENERIC=y CONFIG_X86_CPU=y +CONFIG_X86_L1_CACHE_BYTES=64 +CONFIG_X86_INTERNODE_CACHE_BYTES=64 CONFIG_X86_CMPXCHG=y -CONFIG_X86_L1_CACHE_SHIFT=7 +CONFIG_X86_L1_CACHE_SHIFT=5 CONFIG_X86_XADD=y # CONFIG_X86_PPRO_FENCE is not set CONFIG_X86_WP_WORKS_OK=y @@ -247,7 +264,7 @@ CONFIG_X86_DEBUGCTLMSR=y CONFIG_CPU_SUP_INTEL=y CONFIG_CPU_SUP_CYRIX_32=y CONFIG_CPU_SUP_AMD=y -CONFIG_CPU_SUP_CENTAUR_32=y +CONFIG_CPU_SUP_CENTAUR=y CONFIG_CPU_SUP_TRANSMETA_32=y CONFIG_CPU_SUP_UMC_32=y CONFIG_X86_DS=y @@ -279,6 +296,7 @@ CONFIG_MICROCODE_AMD=y CONFIG_MICROCODE_OLD_INTERFACE=y CONFIG_X86_MSR=y CONFIG_X86_CPUID=y +# CONFIG_X86_CPU_DEBUG is not set # CONFIG_NOHIGHMEM is not set CONFIG_HIGHMEM4G=y # CONFIG_HIGHMEM64G is not set @@ -302,6 +320,8 @@ CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y CONFIG_UNEVICTABLE_LRU=y +CONFIG_HAVE_MLOCK=y +CONFIG_HAVE_MLOCKED_PAGE_BIT=y CONFIG_HIGHPTE=y CONFIG_X86_CHECK_BIOS_CORRUPTION=y CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y @@ -312,6 +332,7 @@ CONFIG_MTRR=y CONFIG_X86_PAT=y CONFIG_EFI=y CONFIG_SECCOMP=y +# CONFIG_CC_STACKPROTECTOR is not set # CONFIG_HZ_100 is not set # CONFIG_HZ_250 is not set # CONFIG_HZ_300 is not set @@ -322,8 +343,9 @@ CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y # CONFIG_KEXEC_JUMP is not set CONFIG_PHYSICAL_START=0x1000000 -# CONFIG_RELOCATABLE is not set -CONFIG_PHYSICAL_ALIGN=0x200000 +CONFIG_RELOCATABLE=y +CONFIG_X86_NEED_RELOCS=y +CONFIG_PHYSICAL_ALIGN=0x1000000 CONFIG_HOTPLUG_CPU=y # CONFIG_COMPAT_VDSO is not set # CONFIG_CMDLINE_BOOL is not set @@ -363,7 +385,6 @@ CONFIG_ACPI_THERMAL=y CONFIG_ACPI_BLACKLIST_YEAR=0 # CONFIG_ACPI_DEBUG is not set # CONFIG_ACPI_PCI_SLOT is not set -CONFIG_ACPI_SYSTEM=y CONFIG_X86_PM_TIMER=y CONFIG_ACPI_CONTAINER=y # CONFIG_ACPI_SBS is not set @@ -425,6 +446,7 @@ CONFIG_PCI_BIOS=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y CONFIG_PCI_DOMAINS=y +# CONFIG_DMAR is not set CONFIG_PCIEPORTBUS=y # CONFIG_HOTPLUG_PCI_PCIE is not set CONFIG_PCIEAER=y @@ -435,6 +457,7 @@ CONFIG_PCI_MSI=y # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_STUB is not set CONFIG_HT_IRQ=y +# CONFIG_PCI_IOV is not set CONFIG_ISA_DMA_API=y # CONFIG_ISA is not set # CONFIG_MCA is not set @@ -481,7 +504,6 @@ CONFIG_NET=y # # Networking options # -CONFIG_COMPAT_NET_DEV_OPS=y CONFIG_PACKET=y CONFIG_PACKET_MMAP=y CONFIG_UNIX=y @@ -639,6 +661,7 @@ CONFIG_LLC=y # CONFIG_LAPB is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set CONFIG_NET_SCHED=y # @@ -696,6 +719,7 @@ CONFIG_NET_SCH_FIFO=y # # CONFIG_NET_PKTGEN is not set # CONFIG_NET_TCPPROBE is not set +# CONFIG_NET_DROP_MONITOR is not set CONFIG_HAMRADIO=y # @@ -706,12 +730,10 @@ CONFIG_HAMRADIO=y # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set -# CONFIG_PHONET is not set CONFIG_FIB_RULES=y CONFIG_WIRELESS=y CONFIG_CFG80211=y # CONFIG_CFG80211_REG_DEBUG is not set -CONFIG_NL80211=y CONFIG_WIRELESS_OLD_REGULATORY=y CONFIG_WIRELESS_EXT=y CONFIG_WIRELESS_EXT_SYSFS=y @@ -789,6 +811,7 @@ CONFIG_MISC_DEVICES=y # CONFIG_ICS932S401 is not set # CONFIG_ENCLOSURE_SERVICES is not set # CONFIG_HP_ILO is not set +# CONFIG_ISL29003 is not set # CONFIG_C2PORT is not set # @@ -842,6 +865,7 @@ CONFIG_SCSI_SPI_ATTRS=y # CONFIG_SCSI_LOWLEVEL is not set # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set # CONFIG_SCSI_DH is not set +# CONFIG_SCSI_OSD_INITIATOR is not set CONFIG_ATA=y # CONFIG_ATA_NONSTANDARD is not set CONFIG_ATA_ACPI=y @@ -940,6 +964,7 @@ CONFIG_DM_ZERO=y CONFIG_MACINTOSH_DRIVERS=y CONFIG_MAC_EMUMOUSEBTN=y CONFIG_NETDEVICES=y +CONFIG_COMPAT_NET_DEV_OPS=y # CONFIG_IFB is not set # CONFIG_DUMMY is not set # CONFIG_BONDING is not set @@ -977,6 +1002,8 @@ CONFIG_MII=y CONFIG_NET_VENDOR_3COM=y # CONFIG_VORTEX is not set # CONFIG_TYPHOON is not set +# CONFIG_ETHOC is not set +# CONFIG_DNET is not set CONFIG_NET_TULIP=y # CONFIG_DE2104X is not set # CONFIG_TULIP is not set @@ -1026,6 +1053,7 @@ CONFIG_E1000=y CONFIG_E1000E=y # CONFIG_IP1000 is not set # CONFIG_IGB is not set +# CONFIG_IGBVF is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set @@ -1040,6 +1068,7 @@ CONFIG_BNX2=y # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set # CONFIG_ATL1E is not set +# CONFIG_ATL1C is not set # CONFIG_JME is not set CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set @@ -1049,6 +1078,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set +# CONFIG_VXGE is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set # CONFIG_NIU is not set @@ -1058,6 +1088,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_BNX2X is not set # CONFIG_QLGE is not set # CONFIG_SFC is not set +# CONFIG_BE2NET is not set CONFIG_TR=y # CONFIG_IBMOL is not set # CONFIG_IBMLS is not set @@ -1073,8 +1104,8 @@ CONFIG_WLAN_80211=y # CONFIG_LIBERTAS is not set # CONFIG_LIBERTAS_THINFIRM is not set # CONFIG_AIRO is not set -# CONFIG_HERMES is not set # CONFIG_ATMEL is not set +# CONFIG_AT76C50X_USB is not set # CONFIG_AIRO_CS is not set # CONFIG_PCMCIA_WL3501 is not set # CONFIG_PRISM54 is not set @@ -1084,21 +1115,21 @@ CONFIG_WLAN_80211=y # CONFIG_RTL8187 is not set # CONFIG_ADM8211 is not set # CONFIG_MAC80211_HWSIM is not set +# CONFIG_MWL8K is not set # CONFIG_P54_COMMON is not set CONFIG_ATH5K=y # CONFIG_ATH5K_DEBUG is not set # CONFIG_ATH9K is not set +# CONFIG_AR9170_USB is not set # CONFIG_IPW2100 is not set # CONFIG_IPW2200 is not set -# CONFIG_IWLCORE is not set -# CONFIG_IWLWIFI_LEDS is not set -# CONFIG_IWLAGN is not set -# CONFIG_IWL3945 is not set +# CONFIG_IWLWIFI is not set # CONFIG_HOSTAP is not set # CONFIG_B43 is not set # CONFIG_B43LEGACY is not set # CONFIG_ZD1211RW is not set # CONFIG_RT2X00 is not set +# CONFIG_HERMES is not set # # Enable WiMAX (Networking options) to see the WiMAX drivers @@ -1209,6 +1240,8 @@ CONFIG_INPUT_TABLET=y # CONFIG_TABLET_USB_KBTAB is not set # CONFIG_TABLET_USB_WACOM is not set CONFIG_INPUT_TOUCHSCREEN=y +# CONFIG_TOUCHSCREEN_AD7879_I2C is not set +# CONFIG_TOUCHSCREEN_AD7879 is not set # CONFIG_TOUCHSCREEN_FUJITSU is not set # CONFIG_TOUCHSCREEN_GUNZE is not set # CONFIG_TOUCHSCREEN_ELO is not set @@ -1303,6 +1336,7 @@ CONFIG_UNIX98_PTYS=y # CONFIG_LEGACY_PTYS is not set # CONFIG_IPMI_HANDLER is not set CONFIG_HW_RANDOM=y +# CONFIG_HW_RANDOM_TIMERIOMEM is not set CONFIG_HW_RANDOM_INTEL=y CONFIG_HW_RANDOM_AMD=y CONFIG_HW_RANDOM_GEODE=y @@ -1390,7 +1424,6 @@ CONFIG_I2C_I801=y # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set -# CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_MAX6875 is not set # CONFIG_SENSORS_TSL2550 is not set # CONFIG_I2C_DEBUG_CORE is not set @@ -1424,6 +1457,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_ADT7475 is not set # CONFIG_SENSORS_K8TEMP is not set # CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_ATK0110 is not set # CONFIG_SENSORS_ATXP1 is not set # CONFIG_SENSORS_DS1621 is not set # CONFIG_SENSORS_I5K_AMB is not set @@ -1433,6 +1467,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_FSCHER is not set # CONFIG_SENSORS_FSCPOS is not set # CONFIG_SENSORS_FSCHMD is not set +# CONFIG_SENSORS_G760A is not set # CONFIG_SENSORS_GL518SM is not set # CONFIG_SENSORS_GL520SM is not set # CONFIG_SENSORS_CORETEMP is not set @@ -1448,11 +1483,14 @@ CONFIG_HWMON=y # CONFIG_SENSORS_LM90 is not set # CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_LM93 is not set +# CONFIG_SENSORS_LTC4215 is not set # CONFIG_SENSORS_LTC4245 is not set +# CONFIG_SENSORS_LM95241 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_MAX6650 is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_SIS5595 is not set # CONFIG_SENSORS_DME1737 is not set # CONFIG_SENSORS_SMSC47M1 is not set @@ -1643,7 +1681,6 @@ CONFIG_FB_EFI=y # CONFIG_FB_3DFX is not set # CONFIG_FB_VOODOO1 is not set # CONFIG_FB_VT8623 is not set -# CONFIG_FB_CYBLA is not set # CONFIG_FB_TRIDENT is not set # CONFIG_FB_ARK is not set # CONFIG_FB_PM3 is not set @@ -1652,6 +1689,7 @@ CONFIG_FB_EFI=y # CONFIG_FB_VIRTUAL is not set # CONFIG_FB_METRONOME is not set # CONFIG_FB_MB862XX is not set +# CONFIG_FB_BROADSHEET is not set CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_LCD_CLASS_DEVICE is not set CONFIG_BACKLIGHT_CLASS_DEVICE=y @@ -1738,6 +1776,8 @@ CONFIG_SND_PCI=y # CONFIG_SND_INDIGO is not set # CONFIG_SND_INDIGOIO is not set # CONFIG_SND_INDIGODJ is not set +# CONFIG_SND_INDIGOIOX is not set +# CONFIG_SND_INDIGODJX is not set # CONFIG_SND_EMU10K1 is not set # CONFIG_SND_EMU10K1X is not set # CONFIG_SND_ENS1370 is not set @@ -1811,15 +1851,17 @@ CONFIG_USB_HIDDEV=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_HID_A4TECH=y CONFIG_HID_APPLE=y CONFIG_HID_BELKIN=y CONFIG_HID_CHERRY=y CONFIG_HID_CHICONY=y CONFIG_HID_CYPRESS=y +# CONFIG_DRAGONRISE_FF is not set CONFIG_HID_EZKEY=y +CONFIG_HID_KYE=y CONFIG_HID_GYRATION=y +CONFIG_HID_KENSINGTON=y CONFIG_HID_LOGITECH=y CONFIG_LOGITECH_FF=y # CONFIG_LOGIRUMBLEPAD2_FF is not set @@ -1885,11 +1927,11 @@ CONFIG_USB_PRINTER=y # CONFIG_USB_TMC is not set # -# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed; +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may # # -# see USB_STORAGE Help for more information +# also be needed; see USB_STORAGE Help for more info # CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_DEBUG is not set @@ -1931,7 +1973,6 @@ CONFIG_USB_LIBUSUAL=y # CONFIG_USB_LED is not set # CONFIG_USB_CYPRESS_CY7C63 is not set # CONFIG_USB_CYTHERM is not set -# CONFIG_USB_PHIDGET is not set # CONFIG_USB_IDMOUSE is not set # CONFIG_USB_FTDI_ELAN is not set # CONFIG_USB_APPLEDISPLAY is not set @@ -1947,6 +1988,7 @@ CONFIG_USB_LIBUSUAL=y # # OTG and related infrastructure # +# CONFIG_NOP_USB_XCEIV is not set # CONFIG_UWB is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set @@ -1958,8 +2000,10 @@ CONFIG_LEDS_CLASS=y # # CONFIG_LEDS_ALIX2 is not set # CONFIG_LEDS_PCA9532 is not set +# CONFIG_LEDS_LP5521 is not set # CONFIG_LEDS_CLEVO_MAIL is not set # CONFIG_LEDS_PCA955X is not set +# CONFIG_LEDS_BD2802 is not set # # LED Triggers @@ -1969,6 +2013,10 @@ CONFIG_LEDS_TRIGGERS=y # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set # CONFIG_LEDS_TRIGGER_BACKLIGHT is not set # CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set + +# +# iptables trigger is under Netfilter config (LED target) +# # CONFIG_ACCESSIBILITY is not set # CONFIG_INFINIBAND is not set CONFIG_EDAC=y @@ -2037,6 +2085,7 @@ CONFIG_DMADEVICES=y # DMA Devices # # CONFIG_INTEL_IOATDMA is not set +# CONFIG_AUXDISPLAY is not set # CONFIG_UIO is not set # CONFIG_STAGING is not set CONFIG_X86_PLATFORM_DEVICES=y @@ -2071,6 +2120,7 @@ CONFIG_DMIID=y # # CONFIG_EXT2_FS is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y @@ -2101,6 +2151,11 @@ CONFIG_AUTOFS4_FS=y CONFIG_GENERIC_ACL=y # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # CONFIG_ISO9660_FS=y @@ -2151,6 +2206,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -2164,7 +2220,6 @@ CONFIG_NFS_ACL_SUPPORT=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y CONFIG_SUNRPC_GSS=y -# CONFIG_SUNRPC_REGISTER_V4 is not set CONFIG_RPCSEC_GSS_KRB5=y # CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set @@ -2251,6 +2306,7 @@ CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set # CONFIG_DETECT_SOFTLOCKUP is not set +# CONFIG_DETECT_HUNG_TASK is not set # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y @@ -2266,6 +2322,7 @@ CONFIG_TIMER_STATS=y # CONFIG_LOCK_STAT is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +CONFIG_STACKTRACE=y # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_HIGHMEM is not set CONFIG_DEBUG_BUGVERBOSE=y @@ -2289,13 +2346,19 @@ CONFIG_FRAME_POINTER=y # CONFIG_FAULT_INJECTION is not set # CONFIG_LATENCYTOP is not set CONFIG_SYSCTL_SYSCALL_CHECK=y +# CONFIG_DEBUG_PAGEALLOC is not set CONFIG_USER_STACKTRACE_SUPPORT=y +CONFIG_NOP_TRACER=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_HAVE_HW_BRANCH_TRACER=y +CONFIG_HAVE_FTRACE_SYSCALLS=y +CONFIG_RING_BUFFER=y +CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y # # Tracers @@ -2305,13 +2368,21 @@ CONFIG_HAVE_HW_BRANCH_TRACER=y # CONFIG_SYSPROF_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_FTRACE_SYSCALLS is not set # CONFIG_BOOT_TRACER is not set # CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_POWER_TRACER is not set # CONFIG_STACK_TRACER is not set # CONFIG_HW_BRANCH_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +CONFIG_BLK_DEV_IO_TRACE=y +# CONFIG_FTRACE_STARTUP_TEST is not set +# CONFIG_MMIOTRACE is not set CONFIG_PROVIDE_OHCI1394_DMA_INIT=y -# CONFIG_DYNAMIC_PRINTK_DEBUG is not set +# CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set @@ -2321,7 +2392,6 @@ CONFIG_EARLY_PRINTK=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACK_USAGE=y -# CONFIG_DEBUG_PAGEALLOC is not set # CONFIG_DEBUG_PER_CPU_MAPS is not set # CONFIG_X86_PTDUMP is not set CONFIG_DEBUG_RODATA=y @@ -2329,7 +2399,7 @@ CONFIG_DEBUG_RODATA=y CONFIG_DEBUG_NX_TEST=m # CONFIG_4KSTACKS is not set CONFIG_DOUBLEFAULT=y -# CONFIG_MMIOTRACE is not set +CONFIG_HAVE_MMIOTRACE_SUPPORT=y CONFIG_IO_DELAY_TYPE_0X80=0 CONFIG_IO_DELAY_TYPE_0XED=1 CONFIG_IO_DELAY_TYPE_UDELAY=2 @@ -2365,6 +2435,8 @@ CONFIG_SECURITY_SELINUX_AVC_STATS=y CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 # CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set # CONFIG_SECURITY_SMACK is not set +# CONFIG_SECURITY_TOMOYO is not set +# CONFIG_IMA is not set CONFIG_CRYPTO=y # @@ -2380,10 +2452,12 @@ CONFIG_CRYPTO_BLKCIPHER2=y CONFIG_CRYPTO_HASH=y CONFIG_CRYPTO_HASH2=y CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_PCOMP=y CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_MANAGER2=y # CONFIG_CRYPTO_GF128MUL is not set # CONFIG_CRYPTO_NULL is not set +CONFIG_CRYPTO_WORKQUEUE=y # CONFIG_CRYPTO_CRYPTD is not set CONFIG_CRYPTO_AUTHENC=y # CONFIG_CRYPTO_TEST is not set @@ -2456,6 +2530,7 @@ CONFIG_CRYPTO_DES=y # Compression # # CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_ZLIB is not set # CONFIG_CRYPTO_LZO is not set # @@ -2467,11 +2542,13 @@ CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_GEODE is not set # CONFIG_CRYPTO_DEV_HIFN_795X is not set CONFIG_HAVE_KVM=y +CONFIG_HAVE_KVM_IRQCHIP=y CONFIG_VIRTUALIZATION=y # CONFIG_KVM is not set # CONFIG_LGUEST is not set # CONFIG_VIRTIO_PCI is not set # CONFIG_VIRTIO_BALLOON is not set +CONFIG_BINARY_PRINTF=y # # Library routines @@ -2489,7 +2566,10 @@ CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_AUDIT_GENERIC=y CONFIG_ZLIB_INFLATE=y -CONFIG_PLIST=y +CONFIG_DECOMPRESS_GZIP=y +CONFIG_DECOMPRESS_BZIP2=y +CONFIG_DECOMPRESS_LZMA=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT=y CONFIG_HAS_DMA=y +CONFIG_NLATTR=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 9fe5d212ab4c..cee1dd2e69b2 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1,12 +1,13 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29-rc4 -# Tue Feb 24 15:44:16 2009 +# Linux kernel version: 2.6.30-rc2 +# Mon May 11 16:22:00 2009 # CONFIG_64BIT=y # CONFIG_X86_32 is not set CONFIG_X86_64=y CONFIG_X86=y +CONFIG_OUTPUT_FORMAT="elf64-x86-64" CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig" CONFIG_GENERIC_TIME=y CONFIG_GENERIC_CMOS_UPDATE=y @@ -34,6 +35,7 @@ CONFIG_ARCH_HAS_CPU_RELAX=y CONFIG_ARCH_HAS_DEFAULT_IDLE=y CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y CONFIG_HAVE_SETUP_PER_CPU_AREA=y +CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y CONFIG_ARCH_HIBERNATION_POSSIBLE=y CONFIG_ARCH_SUSPEND_POSSIBLE=y @@ -41,14 +43,14 @@ CONFIG_ZONE_DMA32=y CONFIG_ARCH_POPULATES_NODE_MAP=y CONFIG_AUDIT_ARCH=y CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y +CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_GENERIC_PENDING_IRQ=y -CONFIG_X86_SMP=y CONFIG_USE_GENERIC_SMP_HELPERS=y CONFIG_X86_64_SMP=y CONFIG_X86_HT=y -CONFIG_X86_BIOS_REBOOT=y CONFIG_X86_TRAMPOLINE=y # CONFIG_KTIME_SCALAR is not set CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" @@ -61,10 +63,17 @@ CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 CONFIG_LOCALVERSION="" # CONFIG_LOCALVERSION_AUTO is not set +CONFIG_HAVE_KERNEL_GZIP=y +CONFIG_HAVE_KERNEL_BZIP2=y +CONFIG_HAVE_KERNEL_LZMA=y +CONFIG_KERNEL_GZIP=y +# CONFIG_KERNEL_BZIP2 is not set +# CONFIG_KERNEL_LZMA is not set CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y CONFIG_BSD_PROCESS_ACCT=y # CONFIG_BSD_PROCESS_ACCT_V3 is not set CONFIG_TASKSTATS=y @@ -114,23 +123,26 @@ CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" +CONFIG_RD_GZIP=y +CONFIG_RD_BZIP2=y +CONFIG_RD_LZMA=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y # CONFIG_EMBEDDED is not set CONFIG_UID16=y CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_PCSPKR_PLATFORM=y -# CONFIG_COMPAT_BRK is not set CONFIG_BASE_FULL=y CONFIG_FUTEX=y -CONFIG_ANON_INODES=y CONFIG_EPOLL=y CONFIG_SIGNALFD=y CONFIG_TIMERFD=y @@ -140,6 +152,7 @@ CONFIG_AIO=y CONFIG_VM_EVENT_COUNTERS=y CONFIG_PCI_QUIRKS=y CONFIG_SLUB_DEBUG=y +# CONFIG_COMPAT_BRK is not set # CONFIG_SLAB is not set CONFIG_SLUB=y # CONFIG_SLOB is not set @@ -155,6 +168,8 @@ CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -167,7 +182,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y -CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BLK_DEV_BSG=y # CONFIG_BLK_DEV_INTEGRITY is not set CONFIG_BLOCK_COMPAT=y @@ -195,12 +209,10 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_GENERIC_CLOCKEVENTS_BUILD=y CONFIG_SMP=y CONFIG_SPARSE_IRQ=y -# CONFIG_NUMA_MIGRATE_IRQ_DESC is not set -CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y -# CONFIG_X86_ELAN is not set -# CONFIG_X86_GENERICARCH is not set +CONFIG_X86_EXTENDED_PLATFORM=y # CONFIG_X86_VSMP is not set +# CONFIG_X86_UV is not set CONFIG_SCHED_OMIT_FRAME_POINTER=y # CONFIG_PARAVIRT_GUEST is not set # CONFIG_MEMTEST is not set @@ -230,10 +242,10 @@ CONFIG_SCHED_OMIT_FRAME_POINTER=y # CONFIG_MCORE2 is not set CONFIG_GENERIC_CPU=y CONFIG_X86_CPU=y -CONFIG_X86_L1_CACHE_BYTES=128 -CONFIG_X86_INTERNODE_CACHE_BYTES=128 +CONFIG_X86_L1_CACHE_BYTES=64 +CONFIG_X86_INTERNODE_CACHE_BYTES=64 CONFIG_X86_CMPXCHG=y -CONFIG_X86_L1_CACHE_SHIFT=7 +CONFIG_X86_L1_CACHE_SHIFT=6 CONFIG_X86_WP_WORKS_OK=y CONFIG_X86_TSC=y CONFIG_X86_CMPXCHG64=y @@ -242,7 +254,7 @@ CONFIG_X86_MINIMUM_CPU_FAMILY=64 CONFIG_X86_DEBUGCTLMSR=y CONFIG_CPU_SUP_INTEL=y CONFIG_CPU_SUP_AMD=y -CONFIG_CPU_SUP_CENTAUR_64=y +CONFIG_CPU_SUP_CENTAUR=y CONFIG_X86_DS=y CONFIG_X86_PTRACE_BTS=y CONFIG_HPET_TIMER=y @@ -269,6 +281,7 @@ CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y CONFIG_X86_MCE=y CONFIG_X86_MCE_INTEL=y CONFIG_X86_MCE_AMD=y +CONFIG_X86_MCE_THRESHOLD=y # CONFIG_I8K is not set CONFIG_MICROCODE=y CONFIG_MICROCODE_INTEL=y @@ -276,6 +289,7 @@ CONFIG_MICROCODE_AMD=y CONFIG_MICROCODE_OLD_INTERFACE=y CONFIG_X86_MSR=y CONFIG_X86_CPUID=y +# CONFIG_X86_CPU_DEBUG is not set CONFIG_ARCH_PHYS_ADDR_T_64BIT=y CONFIG_DIRECT_GBPAGES=y CONFIG_NUMA=y @@ -309,6 +323,8 @@ CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y CONFIG_UNEVICTABLE_LRU=y +CONFIG_HAVE_MLOCK=y +CONFIG_HAVE_MLOCKED_PAGE_BIT=y CONFIG_X86_CHECK_BIOS_CORRUPTION=y CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y CONFIG_X86_RESERVE_LOW_64K=y @@ -317,6 +333,7 @@ CONFIG_MTRR=y CONFIG_X86_PAT=y CONFIG_EFI=y CONFIG_SECCOMP=y +# CONFIG_CC_STACKPROTECTOR is not set # CONFIG_HZ_100 is not set # CONFIG_HZ_250 is not set # CONFIG_HZ_300 is not set @@ -325,9 +342,10 @@ CONFIG_HZ=1000 CONFIG_SCHED_HRTICK=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y +# CONFIG_KEXEC_JUMP is not set CONFIG_PHYSICAL_START=0x1000000 -# CONFIG_RELOCATABLE is not set -CONFIG_PHYSICAL_ALIGN=0x200000 +CONFIG_RELOCATABLE=y +CONFIG_PHYSICAL_ALIGN=0x1000000 CONFIG_HOTPLUG_CPU=y # CONFIG_COMPAT_VDSO is not set # CONFIG_CMDLINE_BOOL is not set @@ -370,7 +388,6 @@ CONFIG_ACPI_NUMA=y CONFIG_ACPI_BLACKLIST_YEAR=0 # CONFIG_ACPI_DEBUG is not set # CONFIG_ACPI_PCI_SLOT is not set -CONFIG_ACPI_SYSTEM=y CONFIG_X86_PM_TIMER=y CONFIG_ACPI_CONTAINER=y # CONFIG_ACPI_SBS is not set @@ -436,6 +453,7 @@ CONFIG_PCI_MSI=y # CONFIG_PCI_DEBUG is not set # CONFIG_PCI_STUB is not set CONFIG_HT_IRQ=y +# CONFIG_PCI_IOV is not set CONFIG_ISA_DMA_API=y CONFIG_K8_NB=y CONFIG_PCCARD=y @@ -481,7 +499,6 @@ CONFIG_NET=y # # Networking options # -CONFIG_COMPAT_NET_DEV_OPS=y CONFIG_PACKET=y CONFIG_PACKET_MMAP=y CONFIG_UNIX=y @@ -639,6 +656,7 @@ CONFIG_LLC=y # CONFIG_LAPB is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set CONFIG_NET_SCHED=y # @@ -696,6 +714,7 @@ CONFIG_NET_SCH_FIFO=y # # CONFIG_NET_PKTGEN is not set # CONFIG_NET_TCPPROBE is not set +# CONFIG_NET_DROP_MONITOR is not set CONFIG_HAMRADIO=y # @@ -706,12 +725,10 @@ CONFIG_HAMRADIO=y # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set -# CONFIG_PHONET is not set CONFIG_FIB_RULES=y CONFIG_WIRELESS=y CONFIG_CFG80211=y # CONFIG_CFG80211_REG_DEBUG is not set -CONFIG_NL80211=y CONFIG_WIRELESS_OLD_REGULATORY=y CONFIG_WIRELESS_EXT=y CONFIG_WIRELESS_EXT_SYSFS=y @@ -788,9 +805,8 @@ CONFIG_MISC_DEVICES=y # CONFIG_TIFM_CORE is not set # CONFIG_ICS932S401 is not set # CONFIG_ENCLOSURE_SERVICES is not set -# CONFIG_SGI_XP is not set # CONFIG_HP_ILO is not set -# CONFIG_SGI_GRU is not set +# CONFIG_ISL29003 is not set # CONFIG_C2PORT is not set # @@ -844,6 +860,7 @@ CONFIG_SCSI_SPI_ATTRS=y # CONFIG_SCSI_LOWLEVEL is not set # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set # CONFIG_SCSI_DH is not set +# CONFIG_SCSI_OSD_INITIATOR is not set CONFIG_ATA=y # CONFIG_ATA_NONSTANDARD is not set CONFIG_ATA_ACPI=y @@ -940,6 +957,7 @@ CONFIG_DM_ZERO=y CONFIG_MACINTOSH_DRIVERS=y CONFIG_MAC_EMUMOUSEBTN=y CONFIG_NETDEVICES=y +CONFIG_COMPAT_NET_DEV_OPS=y # CONFIG_IFB is not set # CONFIG_DUMMY is not set # CONFIG_BONDING is not set @@ -977,6 +995,8 @@ CONFIG_MII=y CONFIG_NET_VENDOR_3COM=y # CONFIG_VORTEX is not set # CONFIG_TYPHOON is not set +# CONFIG_ETHOC is not set +# CONFIG_DNET is not set CONFIG_NET_TULIP=y # CONFIG_DE2104X is not set # CONFIG_TULIP is not set @@ -1026,6 +1046,7 @@ CONFIG_E1000=y # CONFIG_E1000E is not set # CONFIG_IP1000 is not set # CONFIG_IGB is not set +# CONFIG_IGBVF is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set @@ -1040,6 +1061,7 @@ CONFIG_TIGON3=y # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set # CONFIG_ATL1E is not set +# CONFIG_ATL1C is not set # CONFIG_JME is not set CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set @@ -1049,6 +1071,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set +# CONFIG_VXGE is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set # CONFIG_NIU is not set @@ -1058,6 +1081,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_BNX2X is not set # CONFIG_QLGE is not set # CONFIG_SFC is not set +# CONFIG_BE2NET is not set CONFIG_TR=y # CONFIG_IBMOL is not set # CONFIG_3C359 is not set @@ -1072,8 +1096,8 @@ CONFIG_WLAN_80211=y # CONFIG_LIBERTAS is not set # CONFIG_LIBERTAS_THINFIRM is not set # CONFIG_AIRO is not set -# CONFIG_HERMES is not set # CONFIG_ATMEL is not set +# CONFIG_AT76C50X_USB is not set # CONFIG_AIRO_CS is not set # CONFIG_PCMCIA_WL3501 is not set # CONFIG_PRISM54 is not set @@ -1083,21 +1107,21 @@ CONFIG_WLAN_80211=y # CONFIG_RTL8187 is not set # CONFIG_ADM8211 is not set # CONFIG_MAC80211_HWSIM is not set +# CONFIG_MWL8K is not set # CONFIG_P54_COMMON is not set CONFIG_ATH5K=y # CONFIG_ATH5K_DEBUG is not set # CONFIG_ATH9K is not set +# CONFIG_AR9170_USB is not set # CONFIG_IPW2100 is not set # CONFIG_IPW2200 is not set -# CONFIG_IWLCORE is not set -# CONFIG_IWLWIFI_LEDS is not set -# CONFIG_IWLAGN is not set -# CONFIG_IWL3945 is not set +# CONFIG_IWLWIFI is not set # CONFIG_HOSTAP is not set # CONFIG_B43 is not set # CONFIG_B43LEGACY is not set # CONFIG_ZD1211RW is not set # CONFIG_RT2X00 is not set +# CONFIG_HERMES is not set # # Enable WiMAX (Networking options) to see the WiMAX drivers @@ -1208,6 +1232,8 @@ CONFIG_INPUT_TABLET=y # CONFIG_TABLET_USB_KBTAB is not set # CONFIG_TABLET_USB_WACOM is not set CONFIG_INPUT_TOUCHSCREEN=y +# CONFIG_TOUCHSCREEN_AD7879_I2C is not set +# CONFIG_TOUCHSCREEN_AD7879 is not set # CONFIG_TOUCHSCREEN_FUJITSU is not set # CONFIG_TOUCHSCREEN_GUNZE is not set # CONFIG_TOUCHSCREEN_ELO is not set @@ -1301,6 +1327,7 @@ CONFIG_UNIX98_PTYS=y # CONFIG_LEGACY_PTYS is not set # CONFIG_IPMI_HANDLER is not set CONFIG_HW_RANDOM=y +# CONFIG_HW_RANDOM_TIMERIOMEM is not set # CONFIG_HW_RANDOM_INTEL is not set # CONFIG_HW_RANDOM_AMD is not set CONFIG_NVRAM=y @@ -1382,7 +1409,6 @@ CONFIG_I2C_I801=y # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set -# CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_MAX6875 is not set # CONFIG_SENSORS_TSL2550 is not set # CONFIG_I2C_DEBUG_CORE is not set @@ -1416,6 +1442,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_ADT7475 is not set # CONFIG_SENSORS_K8TEMP is not set # CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_ATK0110 is not set # CONFIG_SENSORS_ATXP1 is not set # CONFIG_SENSORS_DS1621 is not set # CONFIG_SENSORS_I5K_AMB is not set @@ -1425,6 +1452,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_FSCHER is not set # CONFIG_SENSORS_FSCPOS is not set # CONFIG_SENSORS_FSCHMD is not set +# CONFIG_SENSORS_G760A is not set # CONFIG_SENSORS_GL518SM is not set # CONFIG_SENSORS_GL520SM is not set # CONFIG_SENSORS_CORETEMP is not set @@ -1440,11 +1468,14 @@ CONFIG_HWMON=y # CONFIG_SENSORS_LM90 is not set # CONFIG_SENSORS_LM92 is not set # CONFIG_SENSORS_LM93 is not set +# CONFIG_SENSORS_LTC4215 is not set # CONFIG_SENSORS_LTC4245 is not set +# CONFIG_SENSORS_LM95241 is not set # CONFIG_SENSORS_MAX1619 is not set # CONFIG_SENSORS_MAX6650 is not set # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_SIS5595 is not set # CONFIG_SENSORS_DME1737 is not set # CONFIG_SENSORS_SMSC47M1 is not set @@ -1635,6 +1666,7 @@ CONFIG_FB_EFI=y # CONFIG_FB_VIRTUAL is not set # CONFIG_FB_METRONOME is not set # CONFIG_FB_MB862XX is not set +# CONFIG_FB_BROADSHEET is not set CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_LCD_CLASS_DEVICE is not set CONFIG_BACKLIGHT_CLASS_DEVICE=y @@ -1720,6 +1752,8 @@ CONFIG_SND_PCI=y # CONFIG_SND_INDIGO is not set # CONFIG_SND_INDIGOIO is not set # CONFIG_SND_INDIGODJ is not set +# CONFIG_SND_INDIGOIOX is not set +# CONFIG_SND_INDIGODJX is not set # CONFIG_SND_EMU10K1 is not set # CONFIG_SND_EMU10K1X is not set # CONFIG_SND_ENS1370 is not set @@ -1792,15 +1826,17 @@ CONFIG_USB_HIDDEV=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_HID_A4TECH=y CONFIG_HID_APPLE=y CONFIG_HID_BELKIN=y CONFIG_HID_CHERRY=y CONFIG_HID_CHICONY=y CONFIG_HID_CYPRESS=y +# CONFIG_DRAGONRISE_FF is not set CONFIG_HID_EZKEY=y +CONFIG_HID_KYE=y CONFIG_HID_GYRATION=y +CONFIG_HID_KENSINGTON=y CONFIG_HID_LOGITECH=y CONFIG_LOGITECH_FF=y # CONFIG_LOGIRUMBLEPAD2_FF is not set @@ -1866,11 +1902,11 @@ CONFIG_USB_PRINTER=y # CONFIG_USB_TMC is not set # -# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed; +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may # # -# see USB_STORAGE Help for more information +# also be needed; see USB_STORAGE Help for more info # CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_DEBUG is not set @@ -1912,7 +1948,6 @@ CONFIG_USB_LIBUSUAL=y # CONFIG_USB_LED is not set # CONFIG_USB_CYPRESS_CY7C63 is not set # CONFIG_USB_CYTHERM is not set -# CONFIG_USB_PHIDGET is not set # CONFIG_USB_IDMOUSE is not set # CONFIG_USB_FTDI_ELAN is not set # CONFIG_USB_APPLEDISPLAY is not set @@ -1928,6 +1963,7 @@ CONFIG_USB_LIBUSUAL=y # # OTG and related infrastructure # +# CONFIG_NOP_USB_XCEIV is not set # CONFIG_UWB is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set @@ -1939,8 +1975,10 @@ CONFIG_LEDS_CLASS=y # # CONFIG_LEDS_ALIX2 is not set # CONFIG_LEDS_PCA9532 is not set +# CONFIG_LEDS_LP5521 is not set # CONFIG_LEDS_CLEVO_MAIL is not set # CONFIG_LEDS_PCA955X is not set +# CONFIG_LEDS_BD2802 is not set # # LED Triggers @@ -1950,6 +1988,10 @@ CONFIG_LEDS_TRIGGERS=y # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set # CONFIG_LEDS_TRIGGER_BACKLIGHT is not set # CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set + +# +# iptables trigger is under Netfilter config (LED target) +# # CONFIG_ACCESSIBILITY is not set # CONFIG_INFINIBAND is not set CONFIG_EDAC=y @@ -2018,6 +2060,7 @@ CONFIG_DMADEVICES=y # DMA Devices # # CONFIG_INTEL_IOATDMA is not set +# CONFIG_AUXDISPLAY is not set # CONFIG_UIO is not set # CONFIG_STAGING is not set CONFIG_X86_PLATFORM_DEVICES=y @@ -2051,6 +2094,7 @@ CONFIG_DMIID=y # # CONFIG_EXT2_FS is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y @@ -2082,6 +2126,11 @@ CONFIG_AUTOFS4_FS=y CONFIG_GENERIC_ACL=y # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # CONFIG_ISO9660_FS=y @@ -2132,6 +2181,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -2145,7 +2195,6 @@ CONFIG_NFS_ACL_SUPPORT=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y CONFIG_SUNRPC_GSS=y -# CONFIG_SUNRPC_REGISTER_V4 is not set CONFIG_RPCSEC_GSS_KRB5=y # CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set @@ -2232,6 +2281,7 @@ CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set # CONFIG_DETECT_SOFTLOCKUP is not set +# CONFIG_DETECT_HUNG_TASK is not set # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y @@ -2247,6 +2297,7 @@ CONFIG_TIMER_STATS=y # CONFIG_LOCK_STAT is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +CONFIG_STACKTRACE=y # CONFIG_DEBUG_KOBJECT is not set CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set @@ -2269,13 +2320,19 @@ CONFIG_FRAME_POINTER=y # CONFIG_FAULT_INJECTION is not set # CONFIG_LATENCYTOP is not set CONFIG_SYSCTL_SYSCALL_CHECK=y +# CONFIG_DEBUG_PAGEALLOC is not set CONFIG_USER_STACKTRACE_SUPPORT=y +CONFIG_NOP_TRACER=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_HAVE_HW_BRANCH_TRACER=y +CONFIG_HAVE_FTRACE_SYSCALLS=y +CONFIG_RING_BUFFER=y +CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y # # Tracers @@ -2285,13 +2342,21 @@ CONFIG_HAVE_HW_BRANCH_TRACER=y # CONFIG_SYSPROF_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_FTRACE_SYSCALLS is not set # CONFIG_BOOT_TRACER is not set # CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_POWER_TRACER is not set # CONFIG_STACK_TRACER is not set # CONFIG_HW_BRANCH_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +CONFIG_BLK_DEV_IO_TRACE=y +# CONFIG_FTRACE_STARTUP_TEST is not set +# CONFIG_MMIOTRACE is not set CONFIG_PROVIDE_OHCI1394_DMA_INIT=y -# CONFIG_DYNAMIC_PRINTK_DEBUG is not set +# CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set @@ -2301,14 +2366,13 @@ CONFIG_EARLY_PRINTK=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACK_USAGE=y -# CONFIG_DEBUG_PAGEALLOC is not set # CONFIG_DEBUG_PER_CPU_MAPS is not set # CONFIG_X86_PTDUMP is not set CONFIG_DEBUG_RODATA=y # CONFIG_DEBUG_RODATA_TEST is not set CONFIG_DEBUG_NX_TEST=m # CONFIG_IOMMU_DEBUG is not set -# CONFIG_MMIOTRACE is not set +CONFIG_HAVE_MMIOTRACE_SUPPORT=y CONFIG_IO_DELAY_TYPE_0X80=0 CONFIG_IO_DELAY_TYPE_0XED=1 CONFIG_IO_DELAY_TYPE_UDELAY=2 @@ -2344,6 +2408,8 @@ CONFIG_SECURITY_SELINUX_AVC_STATS=y CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 # CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set # CONFIG_SECURITY_SMACK is not set +# CONFIG_SECURITY_TOMOYO is not set +# CONFIG_IMA is not set CONFIG_CRYPTO=y # @@ -2359,10 +2425,12 @@ CONFIG_CRYPTO_BLKCIPHER2=y CONFIG_CRYPTO_HASH=y CONFIG_CRYPTO_HASH2=y CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_PCOMP=y CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_MANAGER2=y # CONFIG_CRYPTO_GF128MUL is not set # CONFIG_CRYPTO_NULL is not set +CONFIG_CRYPTO_WORKQUEUE=y # CONFIG_CRYPTO_CRYPTD is not set CONFIG_CRYPTO_AUTHENC=y # CONFIG_CRYPTO_TEST is not set @@ -2414,6 +2482,7 @@ CONFIG_CRYPTO_SHA1=y # CONFIG_CRYPTO_AES=y # CONFIG_CRYPTO_AES_X86_64 is not set +# CONFIG_CRYPTO_AES_NI_INTEL is not set # CONFIG_CRYPTO_ANUBIS is not set CONFIG_CRYPTO_ARC4=y # CONFIG_CRYPTO_BLOWFISH is not set @@ -2435,6 +2504,7 @@ CONFIG_CRYPTO_DES=y # Compression # # CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_ZLIB is not set # CONFIG_CRYPTO_LZO is not set # @@ -2444,10 +2514,12 @@ CONFIG_CRYPTO_DES=y CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_HIFN_795X is not set CONFIG_HAVE_KVM=y +CONFIG_HAVE_KVM_IRQCHIP=y CONFIG_VIRTUALIZATION=y # CONFIG_KVM is not set # CONFIG_VIRTIO_PCI is not set # CONFIG_VIRTIO_BALLOON is not set +CONFIG_BINARY_PRINTF=y # # Library routines @@ -2464,7 +2536,10 @@ CONFIG_CRC32=y # CONFIG_CRC7 is not set # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y -CONFIG_PLIST=y +CONFIG_DECOMPRESS_GZIP=y +CONFIG_DECOMPRESS_BZIP2=y +CONFIG_DECOMPRESS_LZMA=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT=y CONFIG_HAS_DMA=y +CONFIG_NLATTR=y diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index a505202086e8..dcef387ddc36 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -830,4 +830,5 @@ ia32_sys_call_table: .quad sys_inotify_init1 .quad compat_sys_preadv .quad compat_sys_pwritev + .quad compat_sys_rt_tgsigqueueinfo /* 335 */ ia32_syscall_end: diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index f6aa18eadf71..1a37bcdc8606 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -3,6 +3,7 @@ #include <linux/types.h> #include <linux/stddef.h> +#include <linux/stringify.h> #include <asm/asm.h> /* @@ -74,6 +75,22 @@ static inline void alternatives_smp_switch(int smp) {} const unsigned char *const *find_nop_table(void); +/* alternative assembly primitive: */ +#define ALTERNATIVE(oldinstr, newinstr, feature) \ + \ + "661:\n\t" oldinstr "\n662:\n" \ + ".section .altinstructions,\"a\"\n" \ + _ASM_ALIGN "\n" \ + _ASM_PTR "661b\n" /* label */ \ + _ASM_PTR "663f\n" /* new instruction */ \ + " .byte " __stringify(feature) "\n" /* feature bit */ \ + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ + ".section .altinstr_replacement, \"ax\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" + /* * Alternative instructions for different CPU types or capabilities. * @@ -87,18 +104,7 @@ const unsigned char *const *find_nop_table(void); * without volatile and memory clobber. */ #define alternative(oldinstr, newinstr, feature) \ - asm volatile ("661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - _ASM_ALIGN "\n" \ - _ASM_PTR "661b\n" /* label */ \ - _ASM_PTR "663f\n" /* new instruction */ \ - " .byte %c0\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */ \ - ".previous" :: "i" (feature) : "memory") + asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") /* * Alternative inline assembly with input. @@ -109,35 +115,16 @@ const unsigned char *const *find_nop_table(void); * Best is to use constraints that are fixed size (like (%1) ... "r") * If you use variable sized constraints like "m" or "g" in the * replacement make sure to pad to the worst case length. + * Leaving an unused argument 0 to keep API compatibility. */ #define alternative_input(oldinstr, newinstr, feature, input...) \ - asm volatile ("661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - _ASM_ALIGN "\n" \ - _ASM_PTR "661b\n" /* label */ \ - _ASM_PTR "663f\n" /* new instruction */ \ - " .byte %c0\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */ \ - ".previous" :: "i" (feature), ##input) + asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ + : : "i" (0), ## input) /* Like alternative_input, but with a single output argument */ #define alternative_io(oldinstr, newinstr, feature, output, input...) \ - asm volatile ("661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - _ASM_ALIGN "\n" \ - _ASM_PTR "661b\n" /* label */ \ - _ASM_PTR "663f\n" /* new instruction */ \ - " .byte %c[feat]\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */ \ - ".previous" : output : [feat] "i" (feature), ##input) + asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ + : output : "i" (0), ## input) /* * use this macro(s) if you need more than one output parameter diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index f712344329bc..262e02820049 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -27,6 +27,8 @@ extern int amd_iommu_init(void); extern int amd_iommu_init_dma_ops(void); extern void amd_iommu_detect(void); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); +extern void amd_iommu_flush_all_domains(void); +extern void amd_iommu_flush_all_devices(void); #else static inline int amd_iommu_init(void) { return -ENODEV; } static inline void amd_iommu_detect(void) { } diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 95c8cd9d22b5..0c878caaa0a2 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -194,6 +194,27 @@ #define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops domain for an IOMMU */ +extern bool amd_iommu_dump; +#define DUMP_printk(format, arg...) \ + do { \ + if (amd_iommu_dump) \ + printk(KERN_INFO "AMD IOMMU: " format, ## arg); \ + } while(0); + +/* + * Make iterating over all IOMMUs easier + */ +#define for_each_iommu(iommu) \ + list_for_each_entry((iommu), &amd_iommu_list, list) +#define for_each_iommu_safe(iommu, next) \ + list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list) + +#define APERTURE_RANGE_SHIFT 27 /* 128 MB */ +#define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT) +#define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT) +#define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */ +#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT) +#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL) /* * This structure contains generic data for IOMMU protection domains @@ -210,6 +231,26 @@ struct protection_domain { }; /* + * For dynamic growth the aperture size is split into ranges of 128MB of + * DMA address space each. This struct represents one such range. + */ +struct aperture_range { + + /* address allocation bitmap */ + unsigned long *bitmap; + + /* + * Array of PTE pages for the aperture. In this array we save all the + * leaf pages of the domain page table used for the aperture. This way + * we don't need to walk the page table to find a specific PTE. We can + * just calculate its address in constant time. + */ + u64 *pte_pages[64]; + + unsigned long offset; +}; + +/* * Data container for a dma_ops specific protection domain */ struct dma_ops_domain { @@ -222,18 +263,10 @@ struct dma_ops_domain { unsigned long aperture_size; /* address we start to search for free addresses */ - unsigned long next_bit; - - /* address allocation bitmap */ - unsigned long *bitmap; + unsigned long next_address; - /* - * Array of PTE pages for the aperture. In this array we save all the - * leaf pages of the domain page table used for the aperture. This way - * we don't need to walk the page table to find a specific PTE. We can - * just calculate its address in constant time. - */ - u64 **pte_pages; + /* address space relevant data */ + struct aperture_range *aperture[APERTURE_MAX_RANGES]; /* This will be set to true when TLB needs to be flushed */ bool need_flush; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 42f2f8377422..bb7d47925847 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -107,8 +107,7 @@ extern u32 native_safe_apic_wait_icr_idle(void); extern void native_apic_icr_write(u32 low, u32 id); extern u64 native_apic_icr_read(void); -#define EIM_8BIT_APIC_ID 0 -#define EIM_32BIT_APIC_ID 1 +extern int x2apic_mode; #ifdef CONFIG_X86_X2APIC /* @@ -166,10 +165,9 @@ static inline u64 native_x2apic_icr_read(void) return val; } -extern int x2apic, x2apic_phys; +extern int x2apic_phys; extern void check_x2apic(void); extern void enable_x2apic(void); -extern void enable_IR_x2apic(void); extern void x2apic_icr_write(u32 low, u32 id); static inline int x2apic_enabled(void) { @@ -183,6 +181,8 @@ static inline int x2apic_enabled(void) return 1; return 0; } + +#define x2apic_supported() (cpu_has_x2apic) #else static inline void check_x2apic(void) { @@ -190,28 +190,20 @@ static inline void check_x2apic(void) static inline void enable_x2apic(void) { } -static inline void enable_IR_x2apic(void) -{ -} static inline int x2apic_enabled(void) { return 0; } -#define x2apic 0 - +#define x2apic_preenabled 0 +#define x2apic_supported() 0 #endif -extern int get_physical_broadcast(void); +extern void enable_IR_x2apic(void); -#ifdef CONFIG_X86_X2APIC -static inline void ack_x2APIC_irq(void) -{ - /* Docs say use 0 for future compatibility */ - native_apic_msr_write(APIC_EOI, 0); -} -#endif +extern int get_physical_broadcast(void); +extern void apic_disable(void); extern int lapic_get_maxlvt(void); extern void clear_local_APIC(void); extern void connect_bsp_APIC(void); @@ -252,7 +244,7 @@ static inline void lapic_shutdown(void) { } #define local_apic_timer_c2_ok 1 static inline void init_apic_mappings(void) { } static inline void disable_local_APIC(void) { } - +static inline void apic_disable(void) { } #endif /* !CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_64 @@ -410,7 +402,7 @@ static inline unsigned default_get_apic_id(unsigned long x) { unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); - if (APIC_XAPIC(ver)) + if (APIC_XAPIC(ver) || boot_cpu_has(X86_FEATURE_EXTD_APICID)) return (x >> 24) & 0xFF; else return (x >> 24) & 0x0F; @@ -478,6 +470,9 @@ static inline unsigned int read_apic_id(void) extern void default_setup_apic_routing(void); #ifdef CONFIG_X86_32 + +extern struct apic apic_default; + /* * Set up the logical destination ID. * diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index bc9514fb3b13..7ddb36ab933b 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -22,6 +22,7 @@ # define APIC_INTEGRATED(x) (1) #endif #define APIC_XAPIC(x) ((x) >= 0x14) +#define APIC_EXT_SPACE(x) ((x) & 0x80000000) #define APIC_TASKPRI 0x80 #define APIC_TPRI_MASK 0xFFu #define APIC_ARBPRI 0x90 @@ -116,7 +117,9 @@ #define APIC_TDR_DIV_32 0x8 #define APIC_TDR_DIV_64 0x9 #define APIC_TDR_DIV_128 0xA -#define APIC_EILVT0 0x500 +#define APIC_EFEAT 0x400 +#define APIC_ECTRL 0x410 +#define APIC_EILVTn(n) (0x500 + 0x10 * n) #define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ #define APIC_EILVT_NR_AMD_10H 4 #define APIC_EILVT_LVTOFF(x) (((x) >> 4) & 0xF) @@ -125,9 +128,6 @@ #define APIC_EILVT_MSG_NMI 0x4 #define APIC_EILVT_MSG_EXT 0x7 #define APIC_EILVT_MASKED (1 << 16) -#define APIC_EILVT1 0x510 -#define APIC_EILVT2 0x520 -#define APIC_EILVT3 0x530 #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) #define APIC_BASE_MSR 0x800 diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 6ba23dd9fc92..418e632d4a80 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -8,11 +8,26 @@ #ifdef __KERNEL__ +#include <asm/page_types.h> + /* Physical address where kernel should be loaded. */ #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ + (CONFIG_PHYSICAL_ALIGN - 1)) \ & ~(CONFIG_PHYSICAL_ALIGN - 1)) +/* Minimum kernel alignment, as a power of two */ +#ifdef CONFIG_x86_64 +#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT +#else +#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT+1) +#endif +#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) + +#if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \ + (CONFIG_PHYSICAL_ALIGN < (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)) +#error "Invalid value for CONFIG_PHYSICAL_ALIGN" +#endif + #ifdef CONFIG_KERNEL_BZIP2 #define BOOT_HEAP_SIZE 0x400000 #else /* !CONFIG_KERNEL_BZIP2 */ diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h index 433adaebf9b6..1724e8de317c 100644 --- a/arch/x86/include/asm/bootparam.h +++ b/arch/x86/include/asm/bootparam.h @@ -50,7 +50,8 @@ struct setup_header { __u32 ramdisk_size; __u32 bootsect_kludge; __u16 heap_end_ptr; - __u16 _pad1; + __u8 ext_loader_ver; + __u8 ext_loader_type; __u32 cmd_line_ptr; __u32 initrd_addr_max; __u32 kernel_alignment; diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h index 222802029fa6..d96c1ee3a95c 100644 --- a/arch/x86/include/asm/cpu_debug.h +++ b/arch/x86/include/asm/cpu_debug.h @@ -86,105 +86,7 @@ enum cpu_file_bit { CPU_VALUE_BIT, /* value */ }; -#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT) - -/* - * DisplayFamily_DisplayModel Processor Families/Processor Number Series - * -------------------------- ------------------------------------------ - * 05_01, 05_02, 05_04 Pentium, Pentium with MMX - * - * 06_01 Pentium Pro - * 06_03, 06_05 Pentium II Xeon, Pentium II - * 06_07, 06_08, 06_0A, 06_0B Pentium III Xeon, Pentum III - * - * 06_09, 060D Pentium M - * - * 06_0E Core Duo, Core Solo - * - * 06_0F Xeon 3000, 3200, 5100, 5300, 7300 series, - * Core 2 Quad, Core 2 Extreme, Core 2 Duo, - * Pentium dual-core - * 06_17 Xeon 5200, 5400 series, Core 2 Quad Q9650 - * - * 06_1C Atom - * - * 0F_00, 0F_01, 0F_02 Xeon, Xeon MP, Pentium 4 - * 0F_03, 0F_04 Xeon, Xeon MP, Pentium 4, Pentium D - * - * 0F_06 Xeon 7100, 5000 Series, Xeon MP, - * Pentium 4, Pentium D - */ - -/* Register processors bits */ -enum cpu_processor_bit { - CPU_NONE, -/* Intel */ - CPU_INTEL_PENTIUM_BIT, - CPU_INTEL_P6_BIT, - CPU_INTEL_PENTIUM_M_BIT, - CPU_INTEL_CORE_BIT, - CPU_INTEL_CORE2_BIT, - CPU_INTEL_ATOM_BIT, - CPU_INTEL_XEON_P4_BIT, - CPU_INTEL_XEON_MP_BIT, -/* AMD */ - CPU_AMD_K6_BIT, - CPU_AMD_K7_BIT, - CPU_AMD_K8_BIT, - CPU_AMD_0F_BIT, - CPU_AMD_10_BIT, - CPU_AMD_11_BIT, -}; - -#define CPU_INTEL_PENTIUM (1 << CPU_INTEL_PENTIUM_BIT) -#define CPU_INTEL_P6 (1 << CPU_INTEL_P6_BIT) -#define CPU_INTEL_PENTIUM_M (1 << CPU_INTEL_PENTIUM_M_BIT) -#define CPU_INTEL_CORE (1 << CPU_INTEL_CORE_BIT) -#define CPU_INTEL_CORE2 (1 << CPU_INTEL_CORE2_BIT) -#define CPU_INTEL_ATOM (1 << CPU_INTEL_ATOM_BIT) -#define CPU_INTEL_XEON_P4 (1 << CPU_INTEL_XEON_P4_BIT) -#define CPU_INTEL_XEON_MP (1 << CPU_INTEL_XEON_MP_BIT) - -#define CPU_INTEL_PX (CPU_INTEL_P6 | CPU_INTEL_PENTIUM_M) -#define CPU_INTEL_COREX (CPU_INTEL_CORE | CPU_INTEL_CORE2) -#define CPU_INTEL_XEON (CPU_INTEL_XEON_P4 | CPU_INTEL_XEON_MP) -#define CPU_CO_AT (CPU_INTEL_CORE | CPU_INTEL_ATOM) -#define CPU_C2_AT (CPU_INTEL_CORE2 | CPU_INTEL_ATOM) -#define CPU_CX_AT (CPU_INTEL_COREX | CPU_INTEL_ATOM) -#define CPU_CX_XE (CPU_INTEL_COREX | CPU_INTEL_XEON) -#define CPU_P6_XE (CPU_INTEL_P6 | CPU_INTEL_XEON) -#define CPU_PM_CO_AT (CPU_INTEL_PENTIUM_M | CPU_CO_AT) -#define CPU_C2_AT_XE (CPU_C2_AT | CPU_INTEL_XEON) -#define CPU_CX_AT_XE (CPU_CX_AT | CPU_INTEL_XEON) -#define CPU_P6_CX_AT (CPU_INTEL_P6 | CPU_CX_AT) -#define CPU_P6_CX_XE (CPU_P6_XE | CPU_INTEL_COREX) -#define CPU_P6_CX_AT_XE (CPU_INTEL_P6 | CPU_CX_AT_XE) -#define CPU_PM_CX_AT_XE (CPU_INTEL_PENTIUM_M | CPU_CX_AT_XE) -#define CPU_PM_CX_AT (CPU_INTEL_PENTIUM_M | CPU_CX_AT) -#define CPU_PM_CX_XE (CPU_INTEL_PENTIUM_M | CPU_CX_XE) -#define CPU_PX_CX_AT (CPU_INTEL_PX | CPU_CX_AT) -#define CPU_PX_CX_AT_XE (CPU_INTEL_PX | CPU_CX_AT_XE) - -/* Select all supported Intel CPUs */ -#define CPU_INTEL_ALL (CPU_INTEL_PENTIUM | CPU_PX_CX_AT_XE) - -#define CPU_AMD_K6 (1 << CPU_AMD_K6_BIT) -#define CPU_AMD_K7 (1 << CPU_AMD_K7_BIT) -#define CPU_AMD_K8 (1 << CPU_AMD_K8_BIT) -#define CPU_AMD_0F (1 << CPU_AMD_0F_BIT) -#define CPU_AMD_10 (1 << CPU_AMD_10_BIT) -#define CPU_AMD_11 (1 << CPU_AMD_11_BIT) - -#define CPU_K10_PLUS (CPU_AMD_10 | CPU_AMD_11) -#define CPU_K0F_PLUS (CPU_AMD_0F | CPU_K10_PLUS) -#define CPU_K8_PLUS (CPU_AMD_K8 | CPU_K0F_PLUS) -#define CPU_K7_PLUS (CPU_AMD_K7 | CPU_K8_PLUS) - -/* Select all supported AMD CPUs */ -#define CPU_AMD_ALL (CPU_AMD_K6 | CPU_K7_PLUS) - -/* Select all supported CPUs */ -#define CPU_ALL (CPU_INTEL_ALL | CPU_AMD_ALL) +#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT) #define MAX_CPU_FILES 512 @@ -220,7 +122,6 @@ struct cpu_debug_range { unsigned min; /* Register range min */ unsigned max; /* Register range max */ unsigned flag; /* Supported flags */ - unsigned model; /* Supported models */ }; #endif /* _ASM_X86_CPU_DEBUG_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index bb83b1c397aa..19af42138f78 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -22,7 +22,7 @@ #define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ #define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers */ #define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ -#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */ +#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Exception */ #define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ #define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */ #define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */ @@ -94,6 +94,7 @@ #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ +#define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ @@ -192,11 +193,11 @@ extern const char * const x86_power_flags[32]; #define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability)) #define setup_clear_cpu_cap(bit) do { \ clear_cpu_cap(&boot_cpu_data, bit); \ - set_bit(bit, (unsigned long *)cleared_cpu_caps); \ + set_bit(bit, (unsigned long *)cpu_caps_cleared); \ } while (0) #define setup_force_cpu_cap(bit) do { \ set_cpu_cap(&boot_cpu_data, bit); \ - clear_bit(bit, (unsigned long *)cleared_cpu_caps); \ + set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index b762ea49bd70..3bd1777a4c8b 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -63,7 +63,26 @@ extern unsigned long io_apic_irqs; extern void init_VISWS_APIC_irqs(void); extern void setup_IO_APIC(void); extern void disable_IO_APIC(void); -extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); + +struct io_apic_irq_attr { + int ioapic; + int ioapic_pin; + int trigger; + int polarity; +}; + +static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, + int ioapic, int ioapic_pin, + int trigger, int polarity) +{ + irq_attr->ioapic = ioapic; + irq_attr->ioapic_pin = ioapic_pin; + irq_attr->trigger = trigger; + irq_attr->polarity = polarity; +} + +extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, + struct io_apic_irq_attr *irq_attr); extern void setup_ioapic_dest(void); extern void enable_IO_APIC(void); @@ -78,7 +97,11 @@ extern void eisa_set_level_irq(unsigned int irq); /* SMP */ extern void smp_apic_timer_interrupt(struct pt_regs *); extern void smp_spurious_interrupt(struct pt_regs *); +extern void smp_generic_interrupt(struct pt_regs *); extern void smp_error_interrupt(struct pt_regs *); +#ifdef CONFIG_X86_IO_APIC +extern asmlinkage void smp_irq_move_cleanup_interrupt(void); +#endif #ifdef CONFIG_SMP extern void smp_reschedule_interrupt(struct pt_regs *); extern void smp_call_function_interrupt(struct pt_regs *); diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 71c9e5183982..175adf58dd4f 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -67,7 +67,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) ".previous\n" _ASM_EXTABLE(1b, 3b) : [err] "=r" (err) -#if 0 /* See comment in __save_init_fpu() below. */ +#if 0 /* See comment in fxsave() below. */ : [fx] "r" (fx), "m" (*fx), "0" (0)); #else : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0)); @@ -75,14 +75,6 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) return err; } -static inline int restore_fpu_checking(struct task_struct *tsk) -{ - if (task_thread_info(tsk)->status & TS_XSAVE) - return xrstor_checking(&tsk->thread.xstate->xsave); - else - return fxrstor_checking(&tsk->thread.xstate->fxsave); -} - /* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception is pending. Clear the x87 state here by setting it to fixed values. The kernel data segment can be sometimes 0 and sometimes @@ -120,7 +112,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) ".previous\n" _ASM_EXTABLE(1b, 3b) : [err] "=r" (err), "=m" (*fx) -#if 0 /* See comment in __fxsave_clear() below. */ +#if 0 /* See comment in fxsave() below. */ : [fx] "r" (fx), "0" (0)); #else : [fx] "cdaSDb" (fx), "0" (0)); @@ -185,12 +177,9 @@ static inline void tolerant_fwait(void) asm volatile("fnclex ; fwait"); } -static inline void restore_fpu(struct task_struct *tsk) +/* perform fxrstor iff the processor has extended states, otherwise frstor */ +static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { - if (task_thread_info(tsk)->status & TS_XSAVE) { - xrstor_checking(&tsk->thread.xstate->xsave); - return; - } /* * The "nop" is needed to make the instructions the same * length. @@ -199,7 +188,9 @@ static inline void restore_fpu(struct task_struct *tsk) "nop ; frstor %1", "fxrstor %1", X86_FEATURE_FXSR, - "m" (tsk->thread.xstate->fxsave)); + "m" (*fx)); + + return 0; } /* We need a safe address that is cheap to find and that is already @@ -262,6 +253,14 @@ end: #endif /* CONFIG_X86_64 */ +static inline int restore_fpu_checking(struct task_struct *tsk) +{ + if (task_thread_info(tsk)->status & TS_XSAVE) + return xrstor_checking(&tsk->thread.xstate->xsave); + else + return fxrstor_checking(&tsk->thread.xstate->fxsave); +} + /* * Signal frame handlers... */ @@ -305,18 +304,18 @@ static inline void kernel_fpu_end(void) /* * Some instructions like VIA's padlock instructions generate a spurious * DNA fault but don't modify SSE registers. And these instructions - * get used from interrupt context aswell. To prevent these kernel instructions - * in interrupt context interact wrongly with other user/kernel fpu usage, we + * get used from interrupt context as well. To prevent these kernel instructions + * in interrupt context interacting wrongly with other user/kernel fpu usage, we * should use them only in the context of irq_ts_save/restore() */ static inline int irq_ts_save(void) { /* - * If we are in process context, we are ok to take a spurious DNA fault. - * Otherwise, doing clts() in process context require pre-emption to - * be disabled or some heavy lifting like kernel_fpu_begin() + * If in process context and not atomic, we can take a spurious DNA fault. + * Otherwise, doing clts() in process context requires disabling preemption + * or some heavy lifting like kernel_fpu_begin() */ - if (!in_interrupt()) + if (!in_atomic()) return 0; if (read_cr0() & X86_CR0_TS) { diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index 1a99e6c092af..58d7091eeb1f 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h @@ -60,8 +60,4 @@ extern struct irq_chip i8259A_chip; extern void mask_8259A(void); extern void unmask_8259A(void); -#ifdef CONFIG_X86_32 -extern void init_ISA_irqs(void); -#endif - #endif /* _ASM_X86_I8259_H */ diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 9d826e436010..daf866ed0612 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -154,22 +154,19 @@ extern int timer_through_8259; extern int io_apic_get_unique_id(int ioapic, int apic_id); extern int io_apic_get_version(int ioapic); extern int io_apic_get_redir_entries(int ioapic); -extern int io_apic_set_pci_routing(int ioapic, int pin, int irq, - int edge_level, int active_high_low); #endif /* CONFIG_ACPI */ +struct io_apic_irq_attr; +extern int io_apic_set_pci_routing(struct device *dev, int irq, + struct io_apic_irq_attr *irq_attr); extern int (*ioapic_renumber_irq)(int ioapic, int irq); extern void ioapic_init_mappings(void); -#ifdef CONFIG_X86_64 extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); -extern void reinit_intr_remapped_IO_APIC(int intr_remapping, - struct IO_APIC_route_entry **ioapic_entries); -#endif extern void probe_nr_irqs_gsi(void); diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index 86af26091d6c..0e9fe1d9d971 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -1,3 +1,6 @@ +#ifndef _ASM_X86_IOMAP_H +#define _ASM_X86_IOMAP_H + /* * Copyright © 2008 Ingo Molnar * @@ -31,3 +34,5 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); void iounmap_atomic(void *kvaddr, enum km_type type); + +#endif /* _ASM_X86_IOMAP_H */ diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 0396760fccb8..f275e2244505 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -1,6 +1,6 @@ #ifndef _ASM_X86_IRQ_REMAPPING_H #define _ASM_X86_IRQ_REMAPPING_H -#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8) +#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8) #endif /* _ASM_X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 3cbd79bbb47c..910b5a3d6751 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -34,6 +34,7 @@ #ifdef CONFIG_X86_32 # define SYSCALL_VECTOR 0x80 +# define IA32_SYSCALL_VECTOR 0x80 #else # define IA32_SYSCALL_VECTOR 0x80 #endif diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h index 54c8cc53b24d..c2d1f3b58e5f 100644 --- a/arch/x86/include/asm/k8.h +++ b/arch/x86/include/asm/k8.h @@ -12,4 +12,17 @@ extern int cache_k8_northbridges(void); extern void k8_flush_garts(void); extern int k8_scan_nodes(unsigned long start, unsigned long end); +#ifdef CONFIG_K8_NB +static inline struct pci_dev *node_to_k8_nb_misc(int node) +{ + return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; +} +#else +static inline struct pci_dev *node_to_k8_nb_misc(int node) +{ + return NULL; +} +#endif + + #endif /* _ASM_X86_K8_H */ diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index c882664716c1..ef51b501e22a 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -9,20 +9,31 @@ struct cpu_signature { struct device; +enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; + struct microcode_ops { - int (*request_microcode_user) (int cpu, const void __user *buf, size_t size); - int (*request_microcode_fw) (int cpu, struct device *device); + enum ucode_state (*request_microcode_user) (int cpu, + const void __user *buf, size_t size); - void (*apply_microcode) (int cpu); + enum ucode_state (*request_microcode_fw) (int cpu, + struct device *device); - int (*collect_cpu_info) (int cpu, struct cpu_signature *csig); void (*microcode_fini_cpu) (int cpu); + + /* + * The generic 'microcode_core' part guarantees that + * the callbacks below run on a target cpu when they + * are being called. + * See also the "Synchronization" section in microcode_core.c. + */ + int (*apply_microcode) (int cpu); + int (*collect_cpu_info) (int cpu, struct cpu_signature *csig); }; struct ucode_cpu_info { - struct cpu_signature cpu_sig; - int valid; - void *mc; + struct cpu_signature cpu_sig; + int valid; + void *mc; }; extern struct ucode_cpu_info ucode_cpu_info[]; diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 642fc7fc8cdc..e2a1bb6d71ea 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -61,9 +61,11 @@ extern void get_smp_config(void); #ifdef CONFIG_X86_MPPARSE extern void find_smp_config(void); extern void early_reserve_e820_mpc_new(void); +extern int enable_update_mptable; #else static inline void find_smp_config(void) { } static inline void early_reserve_e820_mpc_new(void) { } +#define enable_update_mptable 0 #endif void __cpuinit generic_processor_info(int apicid, int version); @@ -72,20 +74,13 @@ extern void mp_register_ioapic(int id, u32 address, u32 gsi_base); extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi); extern void mp_config_acpi_legacy_irqs(void); -extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low); +struct device; +extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level, + int active_high_low); extern int acpi_probe_gsi(void); #ifdef CONFIG_X86_IO_APIC -extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, - u32 gsi, int triggering, int polarity); extern int mp_find_ioapic(int gsi); extern int mp_find_ioapic_pin(int ioapic, int gsi); -#else -static inline int -mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, - u32 gsi, int triggering, int polarity) -{ - return 0; -} #endif #else /* !CONFIG_ACPI: */ static inline int acpi_probe_gsi(void) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ec41fc16c167..4d58d04fca83 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -121,7 +121,6 @@ #define MSR_K8_TOP_MEM1 0xc001001a #define MSR_K8_TOP_MEM2 0xc001001d #define MSR_K8_SYSCFG 0xc0010010 -#define MSR_K8_HWCR 0xc0010015 #define MSR_K8_INT_PENDING_MSG 0xc0010055 /* C1E active bits in int pending message */ #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index c45a0a568dff..c97264409934 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -64,7 +64,7 @@ static inline int nmi_watchdog_active(void) * but since they are power of two we could use a * cheaper way --cvg */ - return nmi_watchdog & 0x3; + return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC); } #endif diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 064ed6df4cbe..c4ae822e415f 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -17,9 +17,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks, extern void numa_init_array(void); extern int numa_off; -extern void srat_reserve_add_area(int nodeid); -extern int hotadd_percent; - extern s16 apicid_to_node[MAX_LOCAL_APIC]; extern unsigned long numa_free_all_bootmem(void); @@ -27,6 +24,13 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end); #ifdef CONFIG_NUMA +/* + * Too small node sizes may confuse the VM badly. Usually they + * result from BIOS bugs. So dont recognize nodes as standalone + * NUMA entities that have less than this amount of RAM listed: + */ +#define NODE_MIN_SIZE (4*1024*1024) + extern void __init init_cpu_to_node(void); extern void __cpuinit numa_set_node(int cpu, int node); extern void __cpuinit numa_clear_node(int cpu); diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index 0f915ae649a7..6f1b7331313f 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -54,10 +54,6 @@ extern unsigned int __VMALLOC_RESERVE; extern int sysctl_legacy_va_layout; extern void find_low_pfn_range(void); -extern unsigned long init_memory_mapping(unsigned long start, - unsigned long end); -extern void initmem_init(unsigned long, unsigned long); -extern void free_initmem(void); extern void setup_bootmem_allocator(void); #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index d38c91b70248..8d382d3abf38 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -32,22 +32,14 @@ */ #define __PAGE_OFFSET _AC(0xffff880000000000, UL) -#define __PHYSICAL_START CONFIG_PHYSICAL_START -#define __KERNEL_ALIGN 0x200000 - -/* - * Make sure kernel is aligned to 2MB address. Catching it at compile - * time is better. Change your config file and compile the kernel - * for a 2MB aligned address (CONFIG_PHYSICAL_START) - */ -#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0 -#error "CONFIG_PHYSICAL_START must be a multiple of 2MB" -#endif +#define __PHYSICAL_START ((CONFIG_PHYSICAL_START + \ + (CONFIG_PHYSICAL_ALIGN - 1)) & \ + ~(CONFIG_PHYSICAL_ALIGN - 1)) #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) #define __START_KERNEL_map _AC(0xffffffff80000000, UL) -/* See Documentation/x86_64/mm.txt for a description of the memory map. */ +/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ #define __PHYSICAL_MASK_SHIFT 46 #define __VIRTUAL_MASK_SHIFT 48 @@ -71,12 +63,6 @@ extern unsigned long __phys_addr(unsigned long); #define vmemmap ((struct page *)VMEMMAP_START) -extern unsigned long init_memory_mapping(unsigned long start, - unsigned long end); - -extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); -extern void free_initmem(void); - extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 826ad37006ab..6473f5ccff85 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -46,6 +46,12 @@ extern int devmem_is_allowed(unsigned long pagenr); extern unsigned long max_low_pfn_mapped; extern unsigned long max_pfn_mapped; +extern unsigned long init_memory_mapping(unsigned long start, + unsigned long end); + +extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); +extern void free_initmem(void); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PAGE_DEFS_H */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 378e3691c08c..4fb37c8a0832 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -56,6 +56,7 @@ struct desc_ptr; struct tss_struct; struct mm_struct; struct desc_struct; +struct task_struct; /* * Wrapper type for pointers to code which uses the non-standard @@ -203,7 +204,8 @@ struct pv_cpu_ops { void (*swapgs)(void); - struct pv_lazy_ops lazy_mode; + void (*start_context_switch)(struct task_struct *prev); + void (*end_context_switch)(struct task_struct *next); }; struct pv_irq_ops { @@ -1399,25 +1401,23 @@ enum paravirt_lazy_mode { }; enum paravirt_lazy_mode paravirt_get_lazy_mode(void); -void paravirt_enter_lazy_cpu(void); -void paravirt_leave_lazy_cpu(void); +void paravirt_start_context_switch(struct task_struct *prev); +void paravirt_end_context_switch(struct task_struct *next); + void paravirt_enter_lazy_mmu(void); void paravirt_leave_lazy_mmu(void); -void paravirt_leave_lazy(enum paravirt_lazy_mode mode); -#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE -static inline void arch_enter_lazy_cpu_mode(void) +#define __HAVE_ARCH_START_CONTEXT_SWITCH +static inline void arch_start_context_switch(struct task_struct *prev) { - PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); + PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev); } -static inline void arch_leave_lazy_cpu_mode(void) +static inline void arch_end_context_switch(struct task_struct *next) { - PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); + PVOP_VCALL1(pv_cpu_ops.end_context_switch, next); } -void arch_flush_lazy_cpu_mode(void); - #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE static inline void arch_enter_lazy_mmu_mode(void) { @@ -1443,7 +1443,7 @@ u64 _paravirt_ident_64(u64); #define paravirt_nop ((void *)_paravirt_nop) -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) static inline int __raw_spin_is_locked(struct raw_spinlock *lock) { diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index aee103b26d01..02ecb30982a3 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -82,22 +82,22 @@ do { \ case 1: \ asm(op "b %1,"__percpu_arg(0) \ : "+m" (var) \ - : "ri" ((T__)val)); \ + : "qi" ((T__)(val))); \ break; \ case 2: \ asm(op "w %1,"__percpu_arg(0) \ : "+m" (var) \ - : "ri" ((T__)val)); \ + : "ri" ((T__)(val))); \ break; \ case 4: \ asm(op "l %1,"__percpu_arg(0) \ : "+m" (var) \ - : "ri" ((T__)val)); \ + : "ri" ((T__)(val))); \ break; \ case 8: \ asm(op "q %1,"__percpu_arg(0) \ : "+m" (var) \ - : "re" ((T__)val)); \ + : "re" ((T__)(val))); \ break; \ default: __bad_percpu_size(); \ } \ @@ -109,7 +109,7 @@ do { \ switch (sizeof(var)) { \ case 1: \ asm(op "b "__percpu_arg(1)",%0" \ - : "=r" (ret__) \ + : "=q" (ret__) \ : "m" (var)); \ break; \ case 2: \ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 29d96d168bc0..18ef7ebf2631 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -81,6 +81,8 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base) #define pte_val(x) native_pte_val(x) #define __pte(x) native_make_pte(x) +#define arch_end_context_switch(prev) do {} while(0) + #endif /* CONFIG_PARAVIRT */ /* @@ -503,6 +505,8 @@ static inline int pgd_none(pgd_t pgd) #ifndef __ASSEMBLY__ +extern int direct_gbpages; + /* local pte updates need not use xchg for locking */ static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) { diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 6b87bc6d5018..abde308fdb0f 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -25,10 +25,6 @@ extern pgd_t init_level4_pgt[]; extern void paging_init(void); -#endif /* !__ASSEMBLY__ */ - -#ifndef __ASSEMBLY__ - #define pte_ERROR(e) \ printk("%s:%d: bad pte %p(%016lx).\n", \ __FILE__, __LINE__, &(e), pte_val(e)) @@ -135,8 +131,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; } #define update_mmu_cache(vma, address, pte) do { } while (0) -extern int direct_gbpages; - /* Encode and de-code a swap entry */ #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index fbf42b8e0383..766ea16fbbbd 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -51,11 +51,11 @@ typedef struct { pteval_t pte; } pte_t; #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) - +/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) -#define VMALLOC_START _AC(0xffffc20000000000, UL) -#define VMALLOC_END _AC(0xffffe1ffffffffff, UL) -#define VMEMMAP_START _AC(0xffffe20000000000, UL) +#define VMALLOC_START _AC(0xffffc90000000000, UL) +#define VMALLOC_END _AC(0xffffe8ffffffffff, UL) +#define VMEMMAP_START _AC(0xffffea0000000000, UL) #define MODULES_VADDR _AC(0xffffffffa0000000, UL) #define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b8238dc8786d..4d258ad76a0f 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -273,7 +273,6 @@ typedef struct page *pgtable_t; extern pteval_t __supported_pte_mask; extern int nx_enabled; -extern void set_nx(void); #define pgprot_writecombine pgprot_writecombine extern pgprot_t pgprot_writecombine(pgprot_t prot); diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 0b2fab0051e0..c7768269b1cf 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -135,7 +135,8 @@ extern struct cpuinfo_x86 boot_cpu_data; extern struct cpuinfo_x86 new_cpu_data; extern struct tss_struct doublefault_tss; -extern __u32 cleared_cpu_caps[NCAPINTS]; +extern __u32 cpu_caps_cleared[NCAPINTS]; +extern __u32 cpu_caps_set[NCAPINTS]; #ifdef CONFIG_SMP DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); @@ -409,9 +410,6 @@ DECLARE_PER_CPU(unsigned long, stack_canary); extern unsigned int xstate_size; extern void free_thread_xstate(struct task_struct *); extern struct kmem_cache *task_xstate_cachep; -extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); -extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); -extern unsigned short num_cache_leaves; struct thread_struct { /* Cached TLS descriptors: */ @@ -427,8 +425,12 @@ struct thread_struct { unsigned short fsindex; unsigned short gsindex; #endif +#ifdef CONFIG_X86_32 unsigned long ip; +#endif +#ifdef CONFIG_X86_64 unsigned long fs; +#endif unsigned long gs; /* Hardware debugging registers: */ unsigned long debugreg0; @@ -835,6 +837,7 @@ extern unsigned int BIOS_revision; /* Boot loader type from the setup header: */ extern int bootloader_type; +extern int bootloader_version; extern char ignore_fpu_irq; @@ -895,7 +898,6 @@ static inline void spin_lock_prefetch(const void *x) .vm86_info = NULL, \ .sysenter_cs = __KERNEL_CS, \ .io_bitmap_ptr = NULL, \ - .fs = __KERNEL_PERCPU, \ } /* diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 5cdd19f20b5b..0f0d908349aa 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -187,14 +187,15 @@ static inline int v8086_mode(struct pt_regs *regs) /* * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode - * when it traps. So regs will be the current sp. + * when it traps. The previous stack will be directly underneath the saved + * registers, and 'sp/ss' won't even have been saved. Thus the '®s->sp'. * * This is valid only for kernel mode traps. */ -static inline unsigned long kernel_trap_sp(struct pt_regs *regs) +static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) { #ifdef CONFIG_X86_32 - return (unsigned long)regs; + return (unsigned long)(®s->sp); #else return regs->sp; #endif diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index a4737dddfd58..64cf2d24fad1 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -48,9 +48,15 @@ #endif #ifdef CONFIG_X86_64 +#ifdef CONFIG_PARAVIRT +/* Paravirtualized systems may not have PSE or PGE available */ #define NEED_PSE 0 -#define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) #define NEED_PGE 0 +#else +#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31) +#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31) +#endif +#define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) #define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) #define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) #define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index bdc2ada05ae0..4093d1ed6db2 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -33,7 +33,6 @@ struct x86_quirks { int (*setup_ioapic_ids)(void); }; -extern void x86_quirk_pre_intr_init(void); extern void x86_quirk_intr_init(void); extern void x86_quirk_trap_init(void); diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 19e0d88b966d..6a84ed166aec 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -180,7 +180,7 @@ extern int safe_smp_processor_id(void); static inline int logical_smp_processor_id(void) { /* we don't want to mark this access volatile - bad code generation */ - return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); + return GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); } #endif diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index e3cc3c063ec5..4517d6b93188 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h @@ -27,7 +27,7 @@ #else /* CONFIG_X86_32 */ # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ # define MAX_PHYSADDR_BITS 44 -# define MAX_PHYSMEM_BITS 44 /* Can be max 45 bits */ +# define MAX_PHYSMEM_BITS 46 #endif #endif /* CONFIG_SPARSEMEM */ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index e5e6caffec87..b7e5db876399 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -172,7 +172,7 @@ static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1; } -#ifndef CONFIG_PARAVIRT +#ifndef CONFIG_PARAVIRT_SPINLOCKS static inline int __raw_spin_is_locked(raw_spinlock_t *lock) { @@ -206,7 +206,7 @@ static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock, __raw_spin_lock(lock); } -#endif +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) { diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 7043408f6904..372b76edd63f 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -1,7 +1,7 @@ /* * syscalls.h - Linux syscall interfaces (arch-specific) * - * Copyright (c) 2008 Jaswinder Singh + * Copyright (c) 2008 Jaswinder Singh Rajput * * This file is released under the GPLv2. * See the file COPYING for more details. @@ -12,50 +12,55 @@ #include <linux/compiler.h> #include <linux/linkage.h> -#include <linux/types.h> #include <linux/signal.h> +#include <linux/types.h> /* Common in X86_32 and X86_64 */ /* kernel/ioport.c */ asmlinkage long sys_ioperm(unsigned long, unsigned long, int); +/* kernel/process.c */ +int sys_fork(struct pt_regs *); +int sys_vfork(struct pt_regs *); + /* kernel/ldt.c */ asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); +/* kernel/signal.c */ +long sys_rt_sigreturn(struct pt_regs *); + /* kernel/tls.c */ asmlinkage int sys_set_thread_area(struct user_desc __user *); asmlinkage int sys_get_thread_area(struct user_desc __user *); /* X86_32 only */ #ifdef CONFIG_X86_32 +/* kernel/ioport.c */ +long sys_iopl(struct pt_regs *); + /* kernel/process_32.c */ -int sys_fork(struct pt_regs *); int sys_clone(struct pt_regs *); -int sys_vfork(struct pt_regs *); int sys_execve(struct pt_regs *); -/* kernel/signal_32.c */ +/* kernel/signal.c */ asmlinkage int sys_sigsuspend(int, int, old_sigset_t); asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, struct old_sigaction __user *); int sys_sigaltstack(struct pt_regs *); unsigned long sys_sigreturn(struct pt_regs *); -long sys_rt_sigreturn(struct pt_regs *); - -/* kernel/ioport.c */ -long sys_iopl(struct pt_regs *); /* kernel/sys_i386_32.c */ +struct mmap_arg_struct; +struct sel_arg_struct; +struct oldold_utsname; +struct old_utsname; + asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); -struct mmap_arg_struct; asmlinkage int old_mmap(struct mmap_arg_struct __user *); -struct sel_arg_struct; asmlinkage int old_select(struct sel_arg_struct __user *); asmlinkage int sys_ipc(uint, int, int, int, void __user *, long); -struct old_utsname; asmlinkage int sys_uname(struct old_utsname __user *); -struct oldold_utsname; asmlinkage int sys_olduname(struct oldold_utsname __user *); /* kernel/vm86_32.c */ @@ -65,29 +70,27 @@ int sys_vm86(struct pt_regs *); #else /* CONFIG_X86_32 */ /* X86_64 only */ +/* kernel/ioport.c */ +asmlinkage long sys_iopl(unsigned int, struct pt_regs *); + /* kernel/process_64.c */ -asmlinkage long sys_fork(struct pt_regs *); asmlinkage long sys_clone(unsigned long, unsigned long, void __user *, void __user *, struct pt_regs *); -asmlinkage long sys_vfork(struct pt_regs *); asmlinkage long sys_execve(char __user *, char __user * __user *, char __user * __user *, struct pt_regs *); long sys_arch_prctl(int, unsigned long); -/* kernel/ioport.c */ -asmlinkage long sys_iopl(unsigned int, struct pt_regs *); - -/* kernel/signal_64.c */ +/* kernel/signal.c */ asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, struct pt_regs *); -long sys_rt_sigreturn(struct pt_regs *); /* kernel/sys_x86_64.c */ +struct new_utsname; + asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); -struct new_utsname; asmlinkage long sys_uname(struct new_utsname __user *); #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8820a73ae090..602c769fc98c 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -94,7 +94,8 @@ struct thread_info { #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ -#define TIF_SYSCALL_FTRACE 27 /* for ftrace syscall instrumentation */ +#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ +#define TIF_SYSCALL_FTRACE 28 /* for ftrace syscall instrumentation */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -116,6 +117,7 @@ struct thread_info { #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) +#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) #define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE) /* work to do in syscall_trace_enter() */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index f44b49abca49..066ef590d7e0 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -203,7 +203,8 @@ struct pci_bus; void x86_pci_root_bus_res_quirks(struct pci_bus *b); #ifdef CONFIG_SMP -#define mc_capable() (cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids) +#define mc_capable() ((boot_cpu_data.x86_max_cores > 1) && \ + (cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids)) #define smt_capable() (smp_num_siblings > 1) #endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 0d5342515b86..bfd74c032fca 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -2,6 +2,7 @@ #define _ASM_X86_TRAPS_H #include <asm/debugreg.h> +#include <asm/siginfo.h> /* TRAP_TRACE, ... */ #ifdef CONFIG_X86_32 #define dotraplinkage @@ -13,6 +14,9 @@ asmlinkage void divide_error(void); asmlinkage void debug(void); asmlinkage void nmi(void); asmlinkage void int3(void); +asmlinkage void xen_debug(void); +asmlinkage void xen_int3(void); +asmlinkage void xen_stack_segment(void); asmlinkage void overflow(void); asmlinkage void bounds(void); asmlinkage void invalid_op(void); @@ -74,7 +78,6 @@ static inline int get_si_code(unsigned long condition) } extern int panic_on_unrecovered_nmi; -extern int kstack_depth_to_print; void math_error(void __user *); void math_emulate(struct math_emu_info *); diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 6e72d74cf8dc..708dae61262d 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -340,6 +340,7 @@ #define __NR_inotify_init1 332 #define __NR_preadv 333 #define __NR_pwritev 334 +#define __NR_rt_tgsigqueueinfo 335 #ifdef __KERNEL__ diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index f81829462325..4e2b05404400 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -657,6 +657,8 @@ __SYSCALL(__NR_inotify_init1, sys_inotify_init1) __SYSCALL(__NR_preadv, sys_preadv) #define __NR_pwritev 296 __SYSCALL(__NR_pwritev, sys_pwritev) +#define __NR_rt_tgsigqueueinfo 297 +__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) #ifndef __NO_STUBS diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 9b0e61bf7a88..bddd44f2f0ab 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -37,7 +37,7 @@ #define UV_CPUS_PER_ACT_STATUS 32 #define UV_ACT_STATUS_MASK 0x3 #define UV_ACT_STATUS_SIZE 2 -#define UV_ACTIVATION_DESCRIPTOR_SIZE 32 +#define UV_ADP_SIZE 32 #define UV_DISTRIBUTION_SIZE 256 #define UV_SW_ACK_NPENDING 8 #define UV_NET_ENDPOINT_INTD 0x38 diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index d3a98ea1062e..341070f7ad5c 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -133,6 +133,7 @@ struct uv_scir_s { struct uv_hub_info_s { unsigned long global_mmr_base; unsigned long gpa_mask; + unsigned int gnode_extra; unsigned long gnode_upper; unsigned long lowmem_remap_top; unsigned long lowmem_remap_base; @@ -159,7 +160,8 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); * p - PNODE (local part of nsids, right shifted 1) */ #define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) -#define UV_PNODE_TO_NASID(p) (((p) << 1) | uv_hub_info->gnode_upper) +#define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra) +#define UV_PNODE_TO_NASID(p) (UV_PNODE_TO_GNODE(p) << 1) #define UV_LOCAL_MMR_BASE 0xf4000000UL #define UV_GLOBAL_MMR32_BASE 0xf8000000UL @@ -173,7 +175,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); #define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT)) #define UV_GLOBAL_MMR64_PNODE_BITS(p) \ - ((unsigned long)(p) << UV_GLOBAL_MMR64_PNODE_SHIFT) + ((unsigned long)(UV_PNODE_TO_GNODE(p)) << UV_GLOBAL_MMR64_PNODE_SHIFT) #define UV_APIC_PNODE_SHIFT 6 diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 77df4d654ff9..4f78bd682125 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -28,7 +28,7 @@ CFLAGS_paravirt.o := $(nostackp) obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o -obj-y += setup.o i8259.o irqinit_$(BITS).o +obj-y += setup.o i8259.o irqinit.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o @@ -90,7 +90,8 @@ obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o obj-$(CONFIG_KVM_GUEST) += kvm.o obj-$(CONFIG_KVM_CLOCK) += kvmclock.o -obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o +obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o +obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 723989d7f802..631086159c53 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -33,6 +33,7 @@ #include <linux/irq.h> #include <linux/bootmem.h> #include <linux/ioport.h> +#include <linux/pci.h> #include <asm/pgtable.h> #include <asm/io_apic.h> @@ -522,7 +523,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) * success: return IRQ number (>=0) * failure: return < 0 */ -int acpi_register_gsi(u32 gsi, int triggering, int polarity) +int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) { unsigned int irq; unsigned int plat_gsi = gsi; @@ -532,14 +533,14 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity) * Make sure all (legacy) PCI IRQs are set as level-triggered. */ if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { - if (triggering == ACPI_LEVEL_SENSITIVE) + if (trigger == ACPI_LEVEL_SENSITIVE) eisa_set_level_irq(gsi); } #endif #ifdef CONFIG_X86_IO_APIC if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { - plat_gsi = mp_register_gsi(gsi, triggering, polarity); + plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); } #endif acpi_gsi_to_irq(plat_gsi, &irq); @@ -903,10 +904,8 @@ extern int es7000_plat; #endif static struct { - int apic_id; int gsi_base; int gsi_end; - DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); } mp_ioapic_routing[MAX_IO_APICS]; int mp_find_ioapic(int gsi) @@ -986,16 +985,12 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); mp_ioapics[idx].apicid = uniq_ioapic_id(id); -#ifdef CONFIG_X86_32 mp_ioapics[idx].apicver = io_apic_get_version(idx); -#else - mp_ioapics[idx].apicver = 0; -#endif + /* * Build basic GSI lookup table to facilitate gsi->io_apic lookups * and to prevent reprogramming of IOAPIC pins (PCI GSIs). */ - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid; mp_ioapic_routing[idx].gsi_base = gsi_base; mp_ioapic_routing[idx].gsi_end = gsi_base + io_apic_get_redir_entries(idx); @@ -1158,26 +1153,52 @@ void __init mp_config_acpi_legacy_irqs(void) } } -int mp_register_gsi(u32 gsi, int triggering, int polarity) +static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, + int polarity) { +#ifdef CONFIG_X86_MPPARSE + struct mpc_intsrc mp_irq; + struct pci_dev *pdev; + unsigned char number; + unsigned int devfn; int ioapic; - int ioapic_pin; -#ifdef CONFIG_X86_32 -#define MAX_GSI_NUM 4096 -#define IRQ_COMPRESSION_START 64 + u8 pin; - static int pci_irq = IRQ_COMPRESSION_START; - /* - * Mapping between Global System Interrupts, which - * represent all possible interrupts, and IRQs - * assigned to actual devices. - */ - static int gsi_to_irq[MAX_GSI_NUM]; -#else + if (!acpi_ioapic) + return 0; + if (!dev) + return 0; + if (dev->bus != &pci_bus_type) + return 0; + + pdev = to_pci_dev(dev); + number = pdev->bus->number; + devfn = pdev->devfn; + pin = pdev->pin; + /* print the entry should happen on mptable identically */ + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | + (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); + mp_irq.srcbus = number; + mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); + ioapic = mp_find_ioapic(gsi); + mp_irq.dstapic = mp_ioapics[ioapic].apicid; + mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); + + save_mp_irq(&mp_irq); +#endif + return 0; +} + +int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) +{ + int ioapic; + int ioapic_pin; + struct io_apic_irq_attr irq_attr; if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) return gsi; -#endif /* Don't set up the ACPI SCI because it's already set up */ if (acpi_gbl_FADT.sci_interrupt == gsi) @@ -1196,93 +1217,22 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) gsi = ioapic_renumber_irq(ioapic, gsi); #endif - /* - * Avoid pin reprogramming. PRTs typically include entries - * with redundant pin->gsi mappings (but unique PCI devices); - * we only program the IOAPIC on the first. - */ if (ioapic_pin > MP_MAX_IOAPIC_PIN) { printk(KERN_ERR "Invalid reference to IOAPIC pin " - "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, + "%d-%d\n", mp_ioapics[ioapic].apicid, ioapic_pin); return gsi; } - if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { - pr_debug("Pin %d-%d already programmed\n", - mp_ioapic_routing[ioapic].apic_id, ioapic_pin); -#ifdef CONFIG_X86_32 - return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); -#else - return gsi; -#endif - } - - set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed); -#ifdef CONFIG_X86_32 - /* - * For GSI >= 64, use IRQ compression - */ - if ((gsi >= IRQ_COMPRESSION_START) - && (triggering == ACPI_LEVEL_SENSITIVE)) { - /* - * For PCI devices assign IRQs in order, avoiding gaps - * due to unused I/O APIC pins. - */ - int irq = gsi; - if (gsi < MAX_GSI_NUM) { - /* - * Retain the VIA chipset work-around (gsi > 15), but - * avoid a problem where the 8254 timer (IRQ0) is setup - * via an override (so it's not on pin 0 of the ioapic), - * and at the same time, the pin 0 interrupt is a PCI - * type. The gsi > 15 test could cause these two pins - * to be shared as IRQ0, and they are not shareable. - * So test for this condition, and if necessary, avoid - * the pin collision. - */ - gsi = pci_irq++; - /* - * Don't assign IRQ used by ACPI SCI - */ - if (gsi == acpi_gbl_FADT.sci_interrupt) - gsi = pci_irq++; - gsi_to_irq[irq] = gsi; - } else { - printk(KERN_ERR "GSI %u is too high\n", gsi); - return gsi; - } - } -#endif - io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, - triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, - polarity == ACPI_ACTIVE_HIGH ? 0 : 1); - return gsi; -} -int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, - u32 gsi, int triggering, int polarity) -{ -#ifdef CONFIG_X86_MPPARSE - struct mpc_intsrc mp_irq; - int ioapic; + if (enable_update_mptable) + mp_config_acpi_gsi(dev, gsi, trigger, polarity); - if (!acpi_ioapic) - return 0; + set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin, + trigger == ACPI_EDGE_SENSITIVE ? 0 : 1, + polarity == ACPI_ACTIVE_HIGH ? 0 : 1); + io_apic_set_pci_routing(dev, gsi, &irq_attr); - /* print the entry should happen on mptable identically */ - mp_irq.type = MP_INTSRC; - mp_irq.irqtype = mp_INT; - mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | - (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); - mp_irq.srcbus = number; - mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); - ioapic = mp_find_ioapic(gsi); - mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id; - mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); - - save_mp_irq(&mp_irq); -#endif - return 0; + return gsi; } /* diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile index 1c31cc0e9def..167bc16ce0e5 100644 --- a/arch/x86/kernel/acpi/realmode/Makefile +++ b/arch/x86/kernel/acpi/realmode/Makefile @@ -9,7 +9,7 @@ always := wakeup.bin targets := wakeup.elf wakeup.lds -wakeup-y += wakeup.o wakemain.o video-mode.o copy.o +wakeup-y += wakeup.o wakemain.o video-mode.o copy.o bioscall.o regs.o # The link order of the video-*.o modules can matter. In particular, # video-vga.o *must* be listed first, followed by video-vesa.o. diff --git a/arch/x86/kernel/acpi/realmode/bioscall.S b/arch/x86/kernel/acpi/realmode/bioscall.S new file mode 100644 index 000000000000..f51eb0bb56ce --- /dev/null +++ b/arch/x86/kernel/acpi/realmode/bioscall.S @@ -0,0 +1 @@ +#include "../../../boot/bioscall.S" diff --git a/arch/x86/kernel/acpi/realmode/regs.c b/arch/x86/kernel/acpi/realmode/regs.c new file mode 100644 index 000000000000..6206033ba202 --- /dev/null +++ b/arch/x86/kernel/acpi/realmode/regs.c @@ -0,0 +1 @@ +#include "../../../boot/regs.c" diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a97db99dad52..1c60554537c3 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -55,7 +55,16 @@ struct iommu_cmd { static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, struct unity_map_entry *e); static struct dma_ops_domain *find_protection_domain(u16 devid); +static u64* alloc_pte(struct protection_domain *dom, + unsigned long address, u64 + **pte_page, gfp_t gfp); +static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, + unsigned long start_page, + unsigned int pages); +#ifndef BUS_NOTIFY_UNBOUND_DRIVER +#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005 +#endif #ifdef CONFIG_AMD_IOMMU_STATS @@ -213,7 +222,7 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data) { struct amd_iommu *iommu; - list_for_each_entry(iommu, &amd_iommu_list, list) + for_each_iommu(iommu) iommu_poll_events(iommu); return IRQ_HANDLED; @@ -440,7 +449,7 @@ static void iommu_flush_domain(u16 domid) __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, domid, 1, 1); - list_for_each_entry(iommu, &amd_iommu_list, list) { + for_each_iommu(iommu) { spin_lock_irqsave(&iommu->lock, flags); __iommu_queue_command(iommu, &cmd); __iommu_completion_wait(iommu); @@ -449,6 +458,35 @@ static void iommu_flush_domain(u16 domid) } } +void amd_iommu_flush_all_domains(void) +{ + int i; + + for (i = 1; i < MAX_DOMAIN_ID; ++i) { + if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) + continue; + iommu_flush_domain(i); + } +} + +void amd_iommu_flush_all_devices(void) +{ + struct amd_iommu *iommu; + int i; + + for (i = 0; i <= amd_iommu_last_bdf; ++i) { + if (amd_iommu_pd_table[i] == NULL) + continue; + + iommu = amd_iommu_rlookup_table[i]; + if (!iommu) + continue; + + iommu_queue_inv_dev_entry(iommu, i); + iommu_completion_wait(iommu); + } +} + /**************************************************************************** * * The functions below are used the create the page table mappings for @@ -468,7 +506,7 @@ static int iommu_map_page(struct protection_domain *dom, unsigned long phys_addr, int prot) { - u64 __pte, *pte, *page; + u64 __pte, *pte; bus_addr = PAGE_ALIGN(bus_addr); phys_addr = PAGE_ALIGN(phys_addr); @@ -477,27 +515,7 @@ static int iommu_map_page(struct protection_domain *dom, if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) return -EINVAL; - pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; - - if (!IOMMU_PTE_PRESENT(*pte)) { - page = (u64 *)get_zeroed_page(GFP_KERNEL); - if (!page) - return -ENOMEM; - *pte = IOMMU_L2_PDE(virt_to_phys(page)); - } - - pte = IOMMU_PTE_PAGE(*pte); - pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; - - if (!IOMMU_PTE_PRESENT(*pte)) { - page = (u64 *)get_zeroed_page(GFP_KERNEL); - if (!page) - return -ENOMEM; - *pte = IOMMU_L1_PDE(virt_to_phys(page)); - } - - pte = IOMMU_PTE_PAGE(*pte); - pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)]; + pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL); if (IOMMU_PTE_PRESENT(*pte)) return -EBUSY; @@ -595,7 +613,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, * as allocated in the aperture */ if (addr < dma_dom->aperture_size) - __set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap); + __set_bit(addr >> PAGE_SHIFT, + dma_dom->aperture[0]->bitmap); } return 0; @@ -632,42 +651,191 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, ****************************************************************************/ /* - * The address allocator core function. + * The address allocator core functions. * * called with domain->lock held */ + +/* + * This function checks if there is a PTE for a given dma address. If + * there is one, it returns the pointer to it. + */ +static u64* fetch_pte(struct protection_domain *domain, + unsigned long address) +{ + u64 *pte; + + pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)]; + + if (!IOMMU_PTE_PRESENT(*pte)) + return NULL; + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[IOMMU_PTE_L1_INDEX(address)]; + + if (!IOMMU_PTE_PRESENT(*pte)) + return NULL; + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[IOMMU_PTE_L0_INDEX(address)]; + + return pte; +} + +/* + * This function is used to add a new aperture range to an existing + * aperture in case of dma_ops domain allocation or address allocation + * failure. + */ +static int alloc_new_range(struct amd_iommu *iommu, + struct dma_ops_domain *dma_dom, + bool populate, gfp_t gfp) +{ + int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; + int i; + +#ifdef CONFIG_IOMMU_STRESS + populate = false; +#endif + + if (index >= APERTURE_MAX_RANGES) + return -ENOMEM; + + dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp); + if (!dma_dom->aperture[index]) + return -ENOMEM; + + dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp); + if (!dma_dom->aperture[index]->bitmap) + goto out_free; + + dma_dom->aperture[index]->offset = dma_dom->aperture_size; + + if (populate) { + unsigned long address = dma_dom->aperture_size; + int i, num_ptes = APERTURE_RANGE_PAGES / 512; + u64 *pte, *pte_page; + + for (i = 0; i < num_ptes; ++i) { + pte = alloc_pte(&dma_dom->domain, address, + &pte_page, gfp); + if (!pte) + goto out_free; + + dma_dom->aperture[index]->pte_pages[i] = pte_page; + + address += APERTURE_RANGE_SIZE / 64; + } + } + + dma_dom->aperture_size += APERTURE_RANGE_SIZE; + + /* Intialize the exclusion range if necessary */ + if (iommu->exclusion_start && + iommu->exclusion_start >= dma_dom->aperture[index]->offset && + iommu->exclusion_start < dma_dom->aperture_size) { + unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; + int pages = iommu_num_pages(iommu->exclusion_start, + iommu->exclusion_length, + PAGE_SIZE); + dma_ops_reserve_addresses(dma_dom, startpage, pages); + } + + /* + * Check for areas already mapped as present in the new aperture + * range and mark those pages as reserved in the allocator. Such + * mappings may already exist as a result of requested unity + * mappings for devices. + */ + for (i = dma_dom->aperture[index]->offset; + i < dma_dom->aperture_size; + i += PAGE_SIZE) { + u64 *pte = fetch_pte(&dma_dom->domain, i); + if (!pte || !IOMMU_PTE_PRESENT(*pte)) + continue; + + dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1); + } + + return 0; + +out_free: + free_page((unsigned long)dma_dom->aperture[index]->bitmap); + + kfree(dma_dom->aperture[index]); + dma_dom->aperture[index] = NULL; + + return -ENOMEM; +} + +static unsigned long dma_ops_area_alloc(struct device *dev, + struct dma_ops_domain *dom, + unsigned int pages, + unsigned long align_mask, + u64 dma_mask, + unsigned long start) +{ + unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE; + int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT; + int i = start >> APERTURE_RANGE_SHIFT; + unsigned long boundary_size; + unsigned long address = -1; + unsigned long limit; + + next_bit >>= PAGE_SHIFT; + + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + PAGE_SIZE) >> PAGE_SHIFT; + + for (;i < max_index; ++i) { + unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT; + + if (dom->aperture[i]->offset >= dma_mask) + break; + + limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset, + dma_mask >> PAGE_SHIFT); + + address = iommu_area_alloc(dom->aperture[i]->bitmap, + limit, next_bit, pages, 0, + boundary_size, align_mask); + if (address != -1) { + address = dom->aperture[i]->offset + + (address << PAGE_SHIFT); + dom->next_address = address + (pages << PAGE_SHIFT); + break; + } + + next_bit = 0; + } + + return address; +} + static unsigned long dma_ops_alloc_addresses(struct device *dev, struct dma_ops_domain *dom, unsigned int pages, unsigned long align_mask, u64 dma_mask) { - unsigned long limit; unsigned long address; - unsigned long boundary_size; - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - PAGE_SIZE) >> PAGE_SHIFT; - limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0, - dma_mask >> PAGE_SHIFT); +#ifdef CONFIG_IOMMU_STRESS + dom->next_address = 0; + dom->need_flush = true; +#endif - if (dom->next_bit >= limit) { - dom->next_bit = 0; - dom->need_flush = true; - } + address = dma_ops_area_alloc(dev, dom, pages, align_mask, + dma_mask, dom->next_address); - address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, - 0 , boundary_size, align_mask); if (address == -1) { - address = iommu_area_alloc(dom->bitmap, limit, 0, pages, - 0, boundary_size, align_mask); + dom->next_address = 0; + address = dma_ops_area_alloc(dev, dom, pages, align_mask, + dma_mask, 0); dom->need_flush = true; } - if (likely(address != -1)) { - dom->next_bit = address + pages; - address <<= PAGE_SHIFT; - } else + if (unlikely(address == -1)) address = bad_dma_address; WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); @@ -684,11 +852,23 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, unsigned long address, unsigned int pages) { - address >>= PAGE_SHIFT; - iommu_area_free(dom->bitmap, address, pages); + unsigned i = address >> APERTURE_RANGE_SHIFT; + struct aperture_range *range = dom->aperture[i]; - if (address >= dom->next_bit) + BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL); + +#ifdef CONFIG_IOMMU_STRESS + if (i < 4) + return; +#endif + + if (address >= dom->next_address) dom->need_flush = true; + + address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; + + iommu_area_free(range->bitmap, address, pages); + } /**************************************************************************** @@ -736,12 +916,16 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, unsigned long start_page, unsigned int pages) { - unsigned int last_page = dom->aperture_size >> PAGE_SHIFT; + unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; if (start_page + pages > last_page) pages = last_page - start_page; - iommu_area_reserve(dom->bitmap, start_page, pages); + for (i = start_page; i < start_page + pages; ++i) { + int index = i / APERTURE_RANGE_PAGES; + int page = i % APERTURE_RANGE_PAGES; + __set_bit(page, dom->aperture[index]->bitmap); + } } static void free_pagetable(struct protection_domain *domain) @@ -780,14 +964,19 @@ static void free_pagetable(struct protection_domain *domain) */ static void dma_ops_domain_free(struct dma_ops_domain *dom) { + int i; + if (!dom) return; free_pagetable(&dom->domain); - kfree(dom->pte_pages); - - kfree(dom->bitmap); + for (i = 0; i < APERTURE_MAX_RANGES; ++i) { + if (!dom->aperture[i]) + continue; + free_page((unsigned long)dom->aperture[i]->bitmap); + kfree(dom->aperture[i]); + } kfree(dom); } @@ -797,19 +986,9 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) * It also intializes the page table and the address allocator data * structures required for the dma_ops interface */ -static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, - unsigned order) +static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) { struct dma_ops_domain *dma_dom; - unsigned i, num_pte_pages; - u64 *l2_pde; - u64 address; - - /* - * Currently the DMA aperture must be between 32 MB and 1GB in size - */ - if ((order < 25) || (order > 30)) - return NULL; dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); if (!dma_dom) @@ -826,55 +1005,20 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, dma_dom->domain.priv = dma_dom; if (!dma_dom->domain.pt_root) goto free_dma_dom; - dma_dom->aperture_size = (1ULL << order); - dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8), - GFP_KERNEL); - if (!dma_dom->bitmap) - goto free_dma_dom; - /* - * mark the first page as allocated so we never return 0 as - * a valid dma-address. So we can use 0 as error value - */ - dma_dom->bitmap[0] = 1; - dma_dom->next_bit = 0; dma_dom->need_flush = false; dma_dom->target_dev = 0xffff; - /* Intialize the exclusion range if necessary */ - if (iommu->exclusion_start && - iommu->exclusion_start < dma_dom->aperture_size) { - unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; - int pages = iommu_num_pages(iommu->exclusion_start, - iommu->exclusion_length, - PAGE_SIZE); - dma_ops_reserve_addresses(dma_dom, startpage, pages); - } + if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL)) + goto free_dma_dom; /* - * At the last step, build the page tables so we don't need to - * allocate page table pages in the dma_ops mapping/unmapping - * path. + * mark the first page as allocated so we never return 0 as + * a valid dma-address. So we can use 0 as error value */ - num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512); - dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *), - GFP_KERNEL); - if (!dma_dom->pte_pages) - goto free_dma_dom; - - l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL); - if (l2_pde == NULL) - goto free_dma_dom; + dma_dom->aperture[0]->bitmap[0] = 1; + dma_dom->next_address = 0; - dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde)); - - for (i = 0; i < num_pte_pages; ++i) { - dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL); - if (!dma_dom->pte_pages[i]) - goto free_dma_dom; - address = virt_to_phys(dma_dom->pte_pages[i]); - l2_pde[i] = IOMMU_L1_PDE(address); - } return dma_dom; @@ -983,7 +1127,6 @@ static int device_change_notifier(struct notifier_block *nb, struct protection_domain *domain; struct dma_ops_domain *dma_domain; struct amd_iommu *iommu; - int order = amd_iommu_aperture_order; unsigned long flags; if (devid > amd_iommu_last_bdf) @@ -1002,17 +1145,7 @@ static int device_change_notifier(struct notifier_block *nb, "to a non-dma-ops domain\n", dev_name(dev)); switch (action) { - case BUS_NOTIFY_BOUND_DRIVER: - if (domain) - goto out; - dma_domain = find_protection_domain(devid); - if (!dma_domain) - dma_domain = iommu->default_dom; - attach_device(iommu, &dma_domain->domain, devid); - printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " - "device %s\n", dma_domain->domain.id, dev_name(dev)); - break; - case BUS_NOTIFY_UNBIND_DRIVER: + case BUS_NOTIFY_UNBOUND_DRIVER: if (!domain) goto out; detach_device(domain, devid); @@ -1022,7 +1155,7 @@ static int device_change_notifier(struct notifier_block *nb, dma_domain = find_protection_domain(devid); if (dma_domain) goto out; - dma_domain = dma_ops_domain_alloc(iommu, order); + dma_domain = dma_ops_domain_alloc(iommu); if (!dma_domain) goto out; dma_domain->target_dev = devid; @@ -1133,8 +1266,8 @@ static int get_device_resources(struct device *dev, dma_dom = (*iommu)->default_dom; *domain = &dma_dom->domain; attach_device(*iommu, *domain, *bdf); - printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " - "device %s\n", (*domain)->id, dev_name(dev)); + DUMP_printk("Using protection domain %d for device %s\n", + (*domain)->id, dev_name(dev)); } if (domain_for_device(_bdf) == NULL) @@ -1144,6 +1277,66 @@ static int get_device_resources(struct device *dev, } /* + * If the pte_page is not yet allocated this function is called + */ +static u64* alloc_pte(struct protection_domain *dom, + unsigned long address, u64 **pte_page, gfp_t gfp) +{ + u64 *pte, *page; + + pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)]; + + if (!IOMMU_PTE_PRESENT(*pte)) { + page = (u64 *)get_zeroed_page(gfp); + if (!page) + return NULL; + *pte = IOMMU_L2_PDE(virt_to_phys(page)); + } + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[IOMMU_PTE_L1_INDEX(address)]; + + if (!IOMMU_PTE_PRESENT(*pte)) { + page = (u64 *)get_zeroed_page(gfp); + if (!page) + return NULL; + *pte = IOMMU_L1_PDE(virt_to_phys(page)); + } + + pte = IOMMU_PTE_PAGE(*pte); + + if (pte_page) + *pte_page = pte; + + pte = &pte[IOMMU_PTE_L0_INDEX(address)]; + + return pte; +} + +/* + * This function fetches the PTE for a given address in the aperture + */ +static u64* dma_ops_get_pte(struct dma_ops_domain *dom, + unsigned long address) +{ + struct aperture_range *aperture; + u64 *pte, *pte_page; + + aperture = dom->aperture[APERTURE_RANGE_INDEX(address)]; + if (!aperture) + return NULL; + + pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; + if (!pte) { + pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC); + aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; + } else + pte += IOMMU_PTE_L0_INDEX(address); + + return pte; +} + +/* * This is the generic map function. It maps one 4kb page at paddr to * the given address in the DMA address space for the domain. */ @@ -1159,8 +1352,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, paddr &= PAGE_MASK; - pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; - pte += IOMMU_PTE_L0_INDEX(address); + pte = dma_ops_get_pte(dom, address); + if (!pte) + return bad_dma_address; __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; @@ -1185,14 +1379,20 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, struct dma_ops_domain *dom, unsigned long address) { + struct aperture_range *aperture; u64 *pte; if (address >= dom->aperture_size) return; - WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size); + aperture = dom->aperture[APERTURE_RANGE_INDEX(address)]; + if (!aperture) + return; + + pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; + if (!pte) + return; - pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; pte += IOMMU_PTE_L0_INDEX(address); WARN_ON(!*pte); @@ -1216,7 +1416,7 @@ static dma_addr_t __map_single(struct device *dev, u64 dma_mask) { dma_addr_t offset = paddr & ~PAGE_MASK; - dma_addr_t address, start; + dma_addr_t address, start, ret; unsigned int pages; unsigned long align_mask = 0; int i; @@ -1232,14 +1432,33 @@ static dma_addr_t __map_single(struct device *dev, if (align) align_mask = (1UL << get_order(size)) - 1; +retry: address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, dma_mask); - if (unlikely(address == bad_dma_address)) - goto out; + if (unlikely(address == bad_dma_address)) { + /* + * setting next_address here will let the address + * allocator only scan the new allocated range in the + * first run. This is a small optimization. + */ + dma_dom->next_address = dma_dom->aperture_size; + + if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC)) + goto out; + + /* + * aperture was sucessfully enlarged by 128 MB, try + * allocation again + */ + goto retry; + } start = address; for (i = 0; i < pages; ++i) { - dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); + ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); + if (ret == bad_dma_address) + goto out_unmap; + paddr += PAGE_SIZE; start += PAGE_SIZE; } @@ -1255,6 +1474,17 @@ static dma_addr_t __map_single(struct device *dev, out: return address; + +out_unmap: + + for (--i; i >= 0; --i) { + start -= PAGE_SIZE; + dma_ops_domain_unmap(iommu, dma_dom, start); + } + + dma_ops_free_addresses(dma_dom, address, pages); + + return bad_dma_address; } /* @@ -1537,8 +1767,10 @@ static void *alloc_coherent(struct device *dev, size_t size, *dma_addr = __map_single(dev, iommu, domain->priv, paddr, size, DMA_BIDIRECTIONAL, true, dma_mask); - if (*dma_addr == bad_dma_address) + if (*dma_addr == bad_dma_address) { + spin_unlock_irqrestore(&domain->lock, flags); goto out_free; + } iommu_completion_wait(iommu); @@ -1625,7 +1857,6 @@ static void prealloc_protection_domains(void) struct pci_dev *dev = NULL; struct dma_ops_domain *dma_dom; struct amd_iommu *iommu; - int order = amd_iommu_aperture_order; u16 devid; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { @@ -1638,7 +1869,7 @@ static void prealloc_protection_domains(void) iommu = amd_iommu_rlookup_table[devid]; if (!iommu) continue; - dma_dom = dma_ops_domain_alloc(iommu, order); + dma_dom = dma_ops_domain_alloc(iommu); if (!dma_dom) continue; init_unity_mappings_for_device(dma_dom, devid); @@ -1664,7 +1895,6 @@ static struct dma_map_ops amd_iommu_dma_ops = { int __init amd_iommu_init_dma_ops(void) { struct amd_iommu *iommu; - int order = amd_iommu_aperture_order; int ret; /* @@ -1672,8 +1902,8 @@ int __init amd_iommu_init_dma_ops(void) * found in the system. Devices not assigned to any other * protection domain will be assigned to the default one. */ - list_for_each_entry(iommu, &amd_iommu_list, list) { - iommu->default_dom = dma_ops_domain_alloc(iommu, order); + for_each_iommu(iommu) { + iommu->default_dom = dma_ops_domain_alloc(iommu); if (iommu->default_dom == NULL) return -ENOMEM; iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; @@ -1710,7 +1940,7 @@ int __init amd_iommu_init_dma_ops(void) free_domains: - list_for_each_entry(iommu, &amd_iommu_list, list) { + for_each_iommu(iommu) { if (iommu->default_dom) dma_ops_domain_free(iommu->default_dom); } @@ -1842,7 +2072,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, old_domain = domain_for_device(devid); if (old_domain) - return -EBUSY; + detach_device(old_domain, devid); attach_device(iommu, domain, devid); diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 8c0be0902dac..238989ec077d 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -115,15 +115,21 @@ struct ivmd_header { u64 range_length; } __attribute__((packed)); +bool amd_iommu_dump; + static int __initdata amd_iommu_detected; u16 amd_iommu_last_bdf; /* largest PCI device id we have to handle */ LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ -unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ +#ifdef CONFIG_IOMMU_STRESS +bool amd_iommu_isolate = false; +#else bool amd_iommu_isolate = true; /* if true, device isolation is enabled */ +#endif + bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the @@ -175,7 +181,7 @@ static inline void update_last_devid(u16 devid) static inline unsigned long tbl_size(int entry_size) { unsigned shift = PAGE_SHIFT + - get_order(amd_iommu_last_bdf * entry_size); + get_order(((int)amd_iommu_last_bdf + 1) * entry_size); return 1UL << shift; } @@ -193,7 +199,7 @@ static inline unsigned long tbl_size(int entry_size) * This function set the exclusion range in the IOMMU. DMA accesses to the * exclusion range are passed through untranslated */ -static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) +static void iommu_set_exclusion_range(struct amd_iommu *iommu) { u64 start = iommu->exclusion_start & PAGE_MASK; u64 limit = (start + iommu->exclusion_length) & PAGE_MASK; @@ -225,7 +231,7 @@ static void __init iommu_set_device_table(struct amd_iommu *iommu) } /* Generic functions to enable/disable certain features of the IOMMU. */ -static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit) +static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) { u32 ctrl; @@ -244,7 +250,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) } /* Function to enable the hardware */ -static void __init iommu_enable(struct amd_iommu *iommu) +static void iommu_enable(struct amd_iommu *iommu) { printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", dev_name(&iommu->dev->dev), iommu->cap_ptr); @@ -252,11 +258,9 @@ static void __init iommu_enable(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_IOMMU_EN); } -/* Function to enable IOMMU event logging and event interrupts */ -static void __init iommu_enable_event_logging(struct amd_iommu *iommu) +static void iommu_disable(struct amd_iommu *iommu) { - iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); - iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); + iommu_feature_disable(iommu, CONTROL_IOMMU_EN); } /* @@ -413,25 +417,36 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) { u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(CMD_BUFFER_SIZE)); - u64 entry; if (cmd_buf == NULL) return NULL; iommu->cmd_buf_size = CMD_BUFFER_SIZE; - entry = (u64)virt_to_phys(cmd_buf); + return cmd_buf; +} + +/* + * This function writes the command buffer address to the hardware and + * enables it. + */ +static void iommu_enable_command_buffer(struct amd_iommu *iommu) +{ + u64 entry; + + BUG_ON(iommu->cmd_buf == NULL); + + entry = (u64)virt_to_phys(iommu->cmd_buf); entry |= MMIO_CMD_SIZE_512; + memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, - &entry, sizeof(entry)); + &entry, sizeof(entry)); /* set head and tail to zero manually */ writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); - - return cmd_buf; } static void __init free_command_buffer(struct amd_iommu *iommu) @@ -443,20 +458,27 @@ static void __init free_command_buffer(struct amd_iommu *iommu) /* allocates the memory where the IOMMU will log its events to */ static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) { - u64 entry; iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(EVT_BUFFER_SIZE)); if (iommu->evt_buf == NULL) return NULL; + return iommu->evt_buf; +} + +static void iommu_enable_event_buffer(struct amd_iommu *iommu) +{ + u64 entry; + + BUG_ON(iommu->evt_buf == NULL); + entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; + memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, &entry, sizeof(entry)); - iommu->evt_buf_size = EVT_BUFFER_SIZE; - - return iommu->evt_buf; + iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); } static void __init free_event_buffer(struct amd_iommu *iommu) @@ -596,32 +618,83 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, p += sizeof(struct ivhd_header); end += h->length; + while (p < end) { e = (struct ivhd_entry *)p; switch (e->type) { case IVHD_DEV_ALL: + + DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x" + " last device %02x:%02x.%x flags: %02x\n", + PCI_BUS(iommu->first_device), + PCI_SLOT(iommu->first_device), + PCI_FUNC(iommu->first_device), + PCI_BUS(iommu->last_device), + PCI_SLOT(iommu->last_device), + PCI_FUNC(iommu->last_device), + e->flags); + for (dev_i = iommu->first_device; dev_i <= iommu->last_device; ++dev_i) set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0); break; case IVHD_DEV_SELECT: + + DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x " + "flags: %02x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags); + devid = e->devid; set_dev_entry_from_acpi(iommu, devid, e->flags, 0); break; case IVHD_DEV_SELECT_RANGE_START: + + DUMP_printk(" DEV_SELECT_RANGE_START\t " + "devid: %02x:%02x.%x flags: %02x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags); + devid_start = e->devid; flags = e->flags; ext_flags = 0; alias = false; break; case IVHD_DEV_ALIAS: + + DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x " + "flags: %02x devid_to: %02x:%02x.%x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags, + PCI_BUS(e->ext >> 8), + PCI_SLOT(e->ext >> 8), + PCI_FUNC(e->ext >> 8)); + devid = e->devid; devid_to = e->ext >> 8; - set_dev_entry_from_acpi(iommu, devid, e->flags, 0); + set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); amd_iommu_alias_table[devid] = devid_to; break; case IVHD_DEV_ALIAS_RANGE: + + DUMP_printk(" DEV_ALIAS_RANGE\t\t " + "devid: %02x:%02x.%x flags: %02x " + "devid_to: %02x:%02x.%x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags, + PCI_BUS(e->ext >> 8), + PCI_SLOT(e->ext >> 8), + PCI_FUNC(e->ext >> 8)); + devid_start = e->devid; flags = e->flags; devid_to = e->ext >> 8; @@ -629,17 +702,39 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, alias = true; break; case IVHD_DEV_EXT_SELECT: + + DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x " + "flags: %02x ext: %08x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags, e->ext); + devid = e->devid; set_dev_entry_from_acpi(iommu, devid, e->flags, e->ext); break; case IVHD_DEV_EXT_SELECT_RANGE: + + DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: " + "%02x:%02x.%x flags: %02x ext: %08x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid), + e->flags, e->ext); + devid_start = e->devid; flags = e->flags; ext_flags = e->ext; alias = false; break; case IVHD_DEV_RANGE_END: + + DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n", + PCI_BUS(e->devid), + PCI_SLOT(e->devid), + PCI_FUNC(e->devid)); + devid = e->devid; for (dev_i = devid_start; dev_i <= devid; ++dev_i) { if (alias) @@ -679,7 +774,7 @@ static void __init free_iommu_all(void) { struct amd_iommu *iommu, *next; - list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) { + for_each_iommu_safe(iommu, next) { list_del(&iommu->list); free_iommu_one(iommu); kfree(iommu); @@ -710,7 +805,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (!iommu->mmio_base) return -ENOMEM; - iommu_set_device_table(iommu); iommu->cmd_buf = alloc_command_buffer(iommu); if (!iommu->cmd_buf) return -ENOMEM; @@ -746,6 +840,15 @@ static int __init init_iommu_all(struct acpi_table_header *table) h = (struct ivhd_header *)p; switch (*p) { case ACPI_IVHD_TYPE: + + DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x " + "seg: %d flags: %01x info %04x\n", + PCI_BUS(h->devid), PCI_SLOT(h->devid), + PCI_FUNC(h->devid), h->cap_ptr, + h->pci_seg, h->flags, h->info); + DUMP_printk(" mmio-addr: %016llx\n", + h->mmio_phys); + iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); if (iommu == NULL) return -ENOMEM; @@ -773,56 +876,9 @@ static int __init init_iommu_all(struct acpi_table_header *table) * ****************************************************************************/ -static int __init iommu_setup_msix(struct amd_iommu *iommu) -{ - struct amd_iommu *curr; - struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */ - int nvec = 0, i; - - list_for_each_entry(curr, &amd_iommu_list, list) { - if (curr->dev == iommu->dev) { - entries[nvec].entry = curr->evt_msi_num; - entries[nvec].vector = 0; - curr->int_enabled = true; - nvec++; - } - } - - if (pci_enable_msix(iommu->dev, entries, nvec)) { - pci_disable_msix(iommu->dev); - return 1; - } - - for (i = 0; i < nvec; ++i) { - int r = request_irq(entries->vector, amd_iommu_int_handler, - IRQF_SAMPLE_RANDOM, - "AMD IOMMU", - NULL); - if (r) - goto out_free; - } - - return 0; - -out_free: - for (i -= 1; i >= 0; --i) - free_irq(entries->vector, NULL); - - pci_disable_msix(iommu->dev); - - return 1; -} - static int __init iommu_setup_msi(struct amd_iommu *iommu) { int r; - struct amd_iommu *curr; - - list_for_each_entry(curr, &amd_iommu_list, list) { - if (curr->dev == iommu->dev) - curr->int_enabled = true; - } - if (pci_enable_msi(iommu->dev)) return 1; @@ -837,17 +893,18 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu) return 1; } + iommu->int_enabled = true; + iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); + return 0; } -static int __init iommu_init_msi(struct amd_iommu *iommu) +static int iommu_init_msi(struct amd_iommu *iommu) { if (iommu->int_enabled) return 0; - if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX)) - return iommu_setup_msix(iommu); - else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) + if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) return iommu_setup_msi(iommu); return 1; @@ -899,6 +956,7 @@ static int __init init_exclusion_range(struct ivmd_header *m) static int __init init_unity_map_range(struct ivmd_header *m) { struct unity_map_entry *e = 0; + char *s; e = kzalloc(sizeof(*e), GFP_KERNEL); if (e == NULL) @@ -906,14 +964,19 @@ static int __init init_unity_map_range(struct ivmd_header *m) switch (m->type) { default: + kfree(e); + return 0; case ACPI_IVMD_TYPE: + s = "IVMD_TYPEi\t\t\t"; e->devid_start = e->devid_end = m->devid; break; case ACPI_IVMD_TYPE_ALL: + s = "IVMD_TYPE_ALL\t\t"; e->devid_start = 0; e->devid_end = amd_iommu_last_bdf; break; case ACPI_IVMD_TYPE_RANGE: + s = "IVMD_TYPE_RANGE\t\t"; e->devid_start = m->devid; e->devid_end = m->aux; break; @@ -922,6 +985,13 @@ static int __init init_unity_map_range(struct ivmd_header *m) e->address_end = e->address_start + PAGE_ALIGN(m->range_length); e->prot = m->flags >> 1; + DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x" + " range_start: %016llx range_end: %016llx flags: %x\n", s, + PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start), + PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end), + PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end), + e->address_start, e->address_end, m->flags); + list_add_tail(&e->list, &amd_iommu_unity_map); return 0; @@ -967,18 +1037,28 @@ static void init_device_table(void) * This function finally enables all IOMMUs found in the system after * they have been initialized */ -static void __init enable_iommus(void) +static void enable_iommus(void) { struct amd_iommu *iommu; - list_for_each_entry(iommu, &amd_iommu_list, list) { + for_each_iommu(iommu) { + iommu_set_device_table(iommu); + iommu_enable_command_buffer(iommu); + iommu_enable_event_buffer(iommu); iommu_set_exclusion_range(iommu); iommu_init_msi(iommu); - iommu_enable_event_logging(iommu); iommu_enable(iommu); } } +static void disable_iommus(void) +{ + struct amd_iommu *iommu; + + for_each_iommu(iommu) + iommu_disable(iommu); +} + /* * Suspend/Resume support * disable suspend until real resume implemented @@ -986,12 +1066,31 @@ static void __init enable_iommus(void) static int amd_iommu_resume(struct sys_device *dev) { + /* + * Disable IOMMUs before reprogramming the hardware registers. + * IOMMU is still enabled from the resume kernel. + */ + disable_iommus(); + + /* re-load the hardware */ + enable_iommus(); + + /* + * we have to flush after the IOMMUs are enabled because a + * disabled IOMMU will never execute the commands we send + */ + amd_iommu_flush_all_domains(); + amd_iommu_flush_all_devices(); + return 0; } static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state) { - return -EINVAL; + /* disable IOMMUs to go out of the way for BIOS */ + disable_iommus(); + + return 0; } static struct sysdev_class amd_iommu_sysdev_class = { @@ -1137,9 +1236,6 @@ int __init amd_iommu_init(void) enable_iommus(); - printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n", - (1 << (amd_iommu_aperture_order-20))); - printk(KERN_INFO "AMD IOMMU: device isolation "); if (amd_iommu_isolate) printk("enabled\n"); @@ -1211,6 +1307,13 @@ void __init amd_iommu_detect(void) * ****************************************************************************/ +static int __init parse_amd_iommu_dump(char *str) +{ + amd_iommu_dump = true; + + return 1; +} + static int __init parse_amd_iommu_options(char *str) { for (; *str; ++str) { @@ -1225,15 +1328,5 @@ static int __init parse_amd_iommu_options(char *str) return 1; } -static int __init parse_amd_iommu_size_options(char *str) -{ - unsigned order = PAGE_SHIFT + get_order(memparse(str, &str)); - - if ((order > 24) && (order < 31)) - amd_iommu_aperture_order = order; - - return 1; -} - +__setup("amd_iommu_dump", parse_amd_iommu_dump); __setup("amd_iommu=", parse_amd_iommu_options); -__setup("amd_iommu_size=", parse_amd_iommu_size_options); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f2870920f246..a4c9cf0bf70b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -98,6 +98,29 @@ early_param("lapic", parse_lapic); /* Local APIC was disabled by the BIOS and enabled by the kernel */ static int enabled_via_apicbase; +/* + * Handle interrupt mode configuration register (IMCR). + * This register controls whether the interrupt signals + * that reach the BSP come from the master PIC or from the + * local APIC. Before entering Symmetric I/O Mode, either + * the BIOS or the operating system must switch out of + * PIC Mode by changing the IMCR. + */ +static inline void imcr_pic_to_apic(void) +{ + /* select IMCR register */ + outb(0x70, 0x22); + /* NMI and 8259 INTR go through APIC */ + outb(0x01, 0x23); +} + +static inline void imcr_apic_to_pic(void) +{ + /* select IMCR register */ + outb(0x70, 0x22); + /* NMI and 8259 INTR go directly to BSP */ + outb(0x00, 0x23); +} #endif #ifdef CONFIG_X86_64 @@ -111,13 +134,19 @@ static __init int setup_apicpmtimer(char *s) __setup("apicpmtimer", setup_apicpmtimer); #endif +int x2apic_mode; #ifdef CONFIG_X86_X2APIC -int x2apic; /* x2apic enabled before OS handover */ static int x2apic_preenabled; static int disable_x2apic; static __init int setup_nox2apic(char *str) { + if (x2apic_enabled()) { + pr_warning("Bios already enabled x2apic, " + "can't enforce nox2apic"); + return 0; + } + disable_x2apic = 1; setup_clear_cpu_cap(X86_FEATURE_X2APIC); return 0; @@ -209,6 +238,31 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } +/* + * bare function to substitute write operation + * and it's _that_ fast :) + */ +static void native_apic_write_dummy(u32 reg, u32 v) +{ + WARN_ON_ONCE((cpu_has_apic || !disable_apic)); +} + +static u32 native_apic_read_dummy(u32 reg) +{ + WARN_ON_ONCE((cpu_has_apic && !disable_apic)); + return 0; +} + +/* + * right after this call apic->write/read doesn't do anything + * note that there is no restore operation it works one way + */ +void apic_disable(void) +{ + apic->read = native_apic_read_dummy; + apic->write = native_apic_write_dummy; +} + void native_apic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) @@ -348,7 +402,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) { - unsigned long reg = (lvt_off << 4) + APIC_EILVT0; + unsigned long reg = (lvt_off << 4) + APIC_EILVTn(0); unsigned int v = (mask << 16) | (msg_type << 8) | vector; apic_write(reg, v); @@ -815,7 +869,7 @@ void clear_local_APIC(void) u32 v; /* APIC hasn't been mapped yet */ - if (!x2apic && !apic_phys) + if (!x2apic_mode && !apic_phys) return; maxlvt = lapic_get_maxlvt(); @@ -1287,7 +1341,7 @@ void check_x2apic(void) { if (x2apic_enabled()) { pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); - x2apic_preenabled = x2apic = 1; + x2apic_preenabled = x2apic_mode = 1; } } @@ -1295,7 +1349,7 @@ void enable_x2apic(void) { int msr, msr2; - if (!x2apic) + if (!x2apic_mode) return; rdmsr(MSR_IA32_APICBASE, msr, msr2); @@ -1304,6 +1358,7 @@ void enable_x2apic(void) wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); } } +#endif /* CONFIG_X86_X2APIC */ void __init enable_IR_x2apic(void) { @@ -1312,32 +1367,21 @@ void __init enable_IR_x2apic(void) unsigned long flags; struct IO_APIC_route_entry **ioapic_entries = NULL; - if (!cpu_has_x2apic) - return; - - if (!x2apic_preenabled && disable_x2apic) { - pr_info("Skipped enabling x2apic and Interrupt-remapping " - "because of nox2apic\n"); - return; + ret = dmar_table_init(); + if (ret) { + pr_debug("dmar_table_init() failed with %d:\n", ret); + goto ir_failed; } - if (x2apic_preenabled && disable_x2apic) - panic("Bios already enabled x2apic, can't enforce nox2apic"); - - if (!x2apic_preenabled && skip_ioapic_setup) { - pr_info("Skipped enabling x2apic and Interrupt-remapping " - "because of skipping io-apic setup\n"); - return; + if (!intr_remapping_supported()) { + pr_debug("intr-remapping not supported\n"); + goto ir_failed; } - ret = dmar_table_init(); - if (ret) { - pr_info("dmar_table_init() failed with %d:\n", ret); - if (x2apic_preenabled) - panic("x2apic enabled by bios. But IR enabling failed"); - else - pr_info("Not enabling x2apic,Intr-remapping\n"); + if (!x2apic_preenabled && skip_ioapic_setup) { + pr_info("Skipped enabling intr-remap because of skipping " + "io-apic setup\n"); return; } @@ -1357,19 +1401,16 @@ void __init enable_IR_x2apic(void) mask_IO_APIC_setup(ioapic_entries); mask_8259A(); - ret = enable_intr_remapping(EIM_32BIT_APIC_ID); - - if (ret && x2apic_preenabled) { - local_irq_restore(flags); - panic("x2apic enabled by bios. But IR enabling failed"); - } - + ret = enable_intr_remapping(x2apic_supported()); if (ret) goto end_restore; - if (!x2apic) { - x2apic = 1; + pr_info("Enabled Interrupt-remapping\n"); + + if (x2apic_supported() && !x2apic_mode) { + x2apic_mode = 1; enable_x2apic(); + pr_info("Enabled x2apic\n"); } end_restore: @@ -1378,37 +1419,34 @@ end_restore: * IR enabling failed */ restore_IO_APIC_setup(ioapic_entries); - else - reinit_intr_remapped_IO_APIC(x2apic_preenabled, ioapic_entries); unmask_8259A(); local_irq_restore(flags); end: - if (!ret) { - if (!x2apic_preenabled) - pr_info("Enabled x2apic and interrupt-remapping\n"); - else - pr_info("Enabled Interrupt-remapping\n"); - } else - pr_err("Failed to enable Interrupt-remapping and x2apic\n"); if (ioapic_entries) free_ioapic_entries(ioapic_entries); + + if (!ret) + return; + +ir_failed: + if (x2apic_preenabled) + panic("x2apic enabled by bios. But IR enabling failed"); + else if (cpu_has_x2apic) + pr_info("Not enabling x2apic,Intr-remapping\n"); #else if (!cpu_has_x2apic) return; if (x2apic_preenabled) panic("x2apic enabled prior OS handover," - " enable CONFIG_INTR_REMAP"); - - pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping " - " and x2apic\n"); + " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP"); #endif return; } -#endif /* CONFIG_X86_X2APIC */ + #ifdef CONFIG_X86_64 /* @@ -1425,7 +1463,6 @@ static int __init detect_init_APIC(void) } mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - boot_cpu_physical_apicid = 0; return 0; } #else @@ -1539,32 +1576,49 @@ void __init early_init_lapic_mapping(void) */ void __init init_apic_mappings(void) { - if (x2apic) { + unsigned int new_apicid; + + if (x2apic_mode) { boot_cpu_physical_apicid = read_apic_id(); return; } - /* - * If no local APIC can be found then set up a fake all - * zeroes page to simulate the local APIC and another - * one for the IO-APIC. - */ + /* If no local APIC can be found return early */ if (!smp_found_config && detect_init_APIC()) { - apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); - apic_phys = __pa(apic_phys); - } else + /* lets NOP'ify apic operations */ + pr_info("APIC: disable apic facility\n"); + apic_disable(); + } else { apic_phys = mp_lapic_addr; - set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n", - APIC_BASE, apic_phys); + /* + * acpi lapic path already maps that address in + * acpi_register_lapic_address() + */ + if (!acpi_lapic) + set_fixmap_nocache(FIX_APIC_BASE, apic_phys); + + apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n", + APIC_BASE, apic_phys); + } /* * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = read_apic_id(); + new_apicid = read_apic_id(); + if (boot_cpu_physical_apicid != new_apicid) { + boot_cpu_physical_apicid = new_apicid; + /* + * yeah -- we lie about apic_version + * in case if apic was disabled via boot option + * but it's not a problem for SMP compiled kernel + * since smp_sanity_check is prepared for such a case + * and disable smp mode + */ + apic_version[new_apicid] = + GET_APIC_VERSION(apic_read(APIC_LVR)); + } } /* @@ -1733,8 +1787,7 @@ void __init connect_bsp_APIC(void) */ apic_printk(APIC_VERBOSE, "leaving PIC mode, " "enabling APIC mode.\n"); - outb(0x70, 0x22); - outb(0x01, 0x23); + imcr_pic_to_apic(); } #endif if (apic->enable_apic_mode) @@ -1762,8 +1815,7 @@ void disconnect_bsp_APIC(int virt_wire_setup) */ apic_printk(APIC_VERBOSE, "disabling APIC mode, " "entering PIC mode.\n"); - outb(0x70, 0x22); - outb(0x00, 0x23); + imcr_apic_to_pic(); return; } #endif @@ -1969,10 +2021,10 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) local_irq_save(flags); disable_local_APIC(); -#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) disable_intr_remapping(); -#endif + local_irq_restore(flags); return 0; } @@ -1982,42 +2034,34 @@ static int lapic_resume(struct sys_device *dev) unsigned int l, h; unsigned long flags; int maxlvt; - -#ifdef CONFIG_INTR_REMAP - int ret; + int ret = 0; struct IO_APIC_route_entry **ioapic_entries = NULL; if (!apic_pm_state.active) return 0; local_irq_save(flags); - if (x2apic) { + if (intr_remapping_enabled) { ioapic_entries = alloc_ioapic_entries(); if (!ioapic_entries) { WARN(1, "Alloc ioapic_entries in lapic resume failed."); - return -ENOMEM; + ret = -ENOMEM; + goto restore; } ret = save_IO_APIC_setup(ioapic_entries); if (ret) { WARN(1, "Saving IO-APIC state failed: %d\n", ret); free_ioapic_entries(ioapic_entries); - return ret; + goto restore; } mask_IO_APIC_setup(ioapic_entries); mask_8259A(); - enable_x2apic(); } -#else - if (!apic_pm_state.active) - return 0; - local_irq_save(flags); - if (x2apic) + if (x2apic_mode) enable_x2apic(); -#endif - else { /* * Make sure the APICBASE points to the right address @@ -2055,21 +2099,16 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_ESR, 0); apic_read(APIC_ESR); -#ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) - reenable_intr_remapping(EIM_32BIT_APIC_ID); - - if (x2apic) { + if (intr_remapping_enabled) { + reenable_intr_remapping(x2apic_mode); unmask_8259A(); restore_IO_APIC_setup(ioapic_entries); free_ioapic_entries(ioapic_entries); } -#endif - +restore: local_irq_restore(flags); - - return 0; + return ret; } /* @@ -2117,31 +2156,14 @@ static void apic_pm_activate(void) { } #endif /* CONFIG_PM */ #ifdef CONFIG_X86_64 -/* - * apic_is_clustered_box() -- Check if we can expect good TSC - * - * Thus far, the major user of this is IBM's Summit2 series: - * - * Clustered boxes may have unsynced TSC problems if they are - * multi-chassis. Use available data to take a good guess. - * If in doubt, go HPET. - */ -__cpuinit int apic_is_clustered_box(void) + +static int __cpuinit apic_cluster_num(void) { int i, clusters, zeros; unsigned id; u16 *bios_cpu_apicid; DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); - /* - * there is not this kind of box with AMD CPU yet. - * Some AMD box with quadcore cpu and 8 sockets apicid - * will be [4, 0x23] or [8, 0x27] could be thought to - * vsmp box still need checking... - */ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) - return 0; - bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); bitmap_zero(clustermap, NUM_APIC_CLUSTERS); @@ -2177,18 +2199,67 @@ __cpuinit int apic_is_clustered_box(void) ++zeros; } - /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are - * not guaranteed to be synced between boards - */ - if (is_vsmp_box() && clusters > 1) + return clusters; +} + +static int __cpuinitdata multi_checked; +static int __cpuinitdata multi; + +static int __cpuinit set_multi(const struct dmi_system_id *d) +{ + if (multi) + return 0; + pr_info("APIC: %s detected, Multi Chassis\n", d->ident); + multi = 1; + return 0; +} + +static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = { + { + .callback = set_multi, + .ident = "IBM System Summit2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"), + }, + }, + {} +}; + +static void __cpuinit dmi_check_multi(void) +{ + if (multi_checked) + return; + + dmi_check_system(multi_dmi_table); + multi_checked = 1; +} + +/* + * apic_is_clustered_box() -- Check if we can expect good TSC + * + * Thus far, the major user of this is IBM's Summit2 series: + * Clustered boxes may have unsynced TSC problems if they are + * multi-chassis. + * Use DMI to check them + */ +__cpuinit int apic_is_clustered_box(void) +{ + dmi_check_multi(); + if (multi) return 1; + if (!is_vsmp_box()) + return 0; + /* - * If clusters > 2, then should be multi-chassis. - * May have to revisit this when multi-core + hyperthreaded CPUs come - * out, but AFAIK this will work even for them. + * ScaleMP vSMPowered boxes have one cluster per board and TSCs are + * not guaranteed to be synced between boards */ - return (clusters > 2); + if (apic_cluster_num() > 1) + return 1; + + return 0; } #endif diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 306e5e88fb6f..d0c99abc26c3 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -161,7 +161,7 @@ static int flat_apic_id_registered(void) static int flat_phys_pkg_id(int initial_apic_id, int index_msb) { - return hard_smp_processor_id() >> index_msb; + return initial_apic_id >> index_msb; } struct apic apic_flat = { @@ -235,7 +235,7 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) * regardless of how many processors are present (x86_64 ES7000 * is an example). */ - if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && + if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { printk(KERN_DEBUG "system APIC only can use physical flat"); return 1; diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 1c11b819f245..69328ac8de9c 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -145,7 +145,7 @@ es7000_rename_gsi(int ioapic, int gsi) return gsi; } -static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) +static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) { unsigned long vect = 0, psaival = 0; @@ -254,7 +254,7 @@ static int parse_unisys_oem(char *oemptr) } #ifdef CONFIG_ACPI -static int find_unisys_acpi_oem_table(unsigned long *oem_addr) +static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) { struct acpi_table_header *header = NULL; struct es7000_oem_table *table; @@ -285,7 +285,7 @@ static int find_unisys_acpi_oem_table(unsigned long *oem_addr) return 0; } -static void unmap_unisys_acpi_oem_table(unsigned long oem_addr) +static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) { if (!oem_addr) return; @@ -306,7 +306,7 @@ static int es7000_check_dsdt(void) static int es7000_acpi_ret; /* Hook from generic ACPI tables.c */ -static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { unsigned long oem_addr = 0; int check_dsdt; @@ -717,7 +717,7 @@ struct apic apic_es7000_cluster = { .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, }; -struct apic apic_es7000 = { +struct apic __refdata apic_es7000 = { .name = "es7000", .probe = probe_es7000, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 30da617d18e4..1946fac42ab3 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -59,6 +59,7 @@ #include <asm/setup.h> #include <asm/irq_remapping.h> #include <asm/hpet.h> +#include <asm/hw_irq.h> #include <asm/uv/uv_hub.h> #include <asm/uv/uv_irq.h> @@ -129,12 +130,9 @@ struct irq_pin_list { struct irq_pin_list *next; }; -static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) +static struct irq_pin_list *get_one_free_irq_2_pin(int node) { struct irq_pin_list *pin; - int node; - - node = cpu_to_node(cpu); pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); @@ -148,9 +146,6 @@ struct irq_cfg { unsigned move_cleanup_count; u8 vector; u8 move_in_progress : 1; -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - u8 move_desc_pending : 1; -#endif }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ @@ -212,12 +207,9 @@ static struct irq_cfg *irq_cfg(unsigned int irq) return cfg; } -static struct irq_cfg *get_one_free_irq_cfg(int cpu) +static struct irq_cfg *get_one_free_irq_cfg(int node) { struct irq_cfg *cfg; - int node; - - node = cpu_to_node(cpu); cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); if (cfg) { @@ -238,13 +230,13 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu) return cfg; } -int arch_init_chip_data(struct irq_desc *desc, int cpu) +int arch_init_chip_data(struct irq_desc *desc, int node) { struct irq_cfg *cfg; cfg = desc->chip_data; if (!cfg) { - desc->chip_data = get_one_free_irq_cfg(cpu); + desc->chip_data = get_one_free_irq_cfg(node); if (!desc->chip_data) { printk(KERN_ERR "can not alloc irq_cfg\n"); BUG_ON(1); @@ -254,10 +246,9 @@ int arch_init_chip_data(struct irq_desc *desc, int cpu) return 0; } -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - +/* for move_irq_desc */ static void -init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) +init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node) { struct irq_pin_list *old_entry, *head, *tail, *entry; @@ -266,7 +257,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) if (!old_entry) return; - entry = get_one_free_irq_2_pin(cpu); + entry = get_one_free_irq_2_pin(node); if (!entry) return; @@ -276,7 +267,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) tail = entry; old_entry = old_entry->next; while (old_entry) { - entry = get_one_free_irq_2_pin(cpu); + entry = get_one_free_irq_2_pin(node); if (!entry) { entry = head; while (entry) { @@ -316,12 +307,12 @@ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) } void arch_init_copy_chip_data(struct irq_desc *old_desc, - struct irq_desc *desc, int cpu) + struct irq_desc *desc, int node) { struct irq_cfg *cfg; struct irq_cfg *old_cfg; - cfg = get_one_free_irq_cfg(cpu); + cfg = get_one_free_irq_cfg(node); if (!cfg) return; @@ -332,7 +323,7 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc, memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); - init_copy_irq_2_pin(old_cfg, cfg, cpu); + init_copy_irq_2_pin(old_cfg, cfg, node); } static void free_irq_cfg(struct irq_cfg *old_cfg) @@ -356,19 +347,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) old_desc->chip_data = NULL; } } - -static void -set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) -{ - struct irq_cfg *cfg = desc->chip_data; - - if (!cfg->move_in_progress) { - /* it means that domain is not changed */ - if (!cpumask_intersects(desc->affinity, mask)) - cfg->move_desc_pending = 1; - } -} -#endif +/* end for move_irq_desc */ #else static struct irq_cfg *irq_cfg(unsigned int irq) @@ -378,13 +357,6 @@ static struct irq_cfg *irq_cfg(unsigned int irq) #endif -#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC -static inline void -set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) -{ -} -#endif - struct io_apic { unsigned int index; unsigned int unused[3]; @@ -518,132 +490,18 @@ static void ioapic_mask_entry(int apic, int pin) spin_unlock_irqrestore(&ioapic_lock, flags); } -#ifdef CONFIG_SMP -static void send_cleanup_vector(struct irq_cfg *cfg) -{ - cpumask_var_t cleanup_mask; - - if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { - unsigned int i; - cfg->move_cleanup_count = 0; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - cfg->move_cleanup_count++; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); - } else { - cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); - cfg->move_cleanup_count = cpumask_weight(cleanup_mask); - apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - free_cpumask_var(cleanup_mask); - } - cfg->move_in_progress = 0; -} - -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) -{ - int apic, pin; - struct irq_pin_list *entry; - u8 vector = cfg->vector; - - entry = cfg->irq_2_pin; - for (;;) { - unsigned int reg; - - if (!entry) - break; - - apic = entry->apic; - pin = entry->pin; - /* - * With interrupt-remapping, destination information comes - * from interrupt-remapping table entry. - */ - if (!irq_remapped(irq)) - io_apic_write(apic, 0x11 + pin*2, dest); - reg = io_apic_read(apic, 0x10 + pin*2); - reg &= ~IO_APIC_REDIR_VECTOR_MASK; - reg |= vector; - io_apic_modify(apic, 0x10 + pin*2, reg); - if (!entry->next) - break; - entry = entry->next; - } -} - -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); - -/* - * Either sets desc->affinity to a valid value, and returns - * ->cpu_mask_to_apicid of that, or returns BAD_APICID and - * leaves desc->affinity untouched. - */ -static unsigned int -set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) -{ - struct irq_cfg *cfg; - unsigned int irq; - - if (!cpumask_intersects(mask, cpu_online_mask)) - return BAD_APICID; - - irq = desc->irq; - cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) - return BAD_APICID; - - /* check that before desc->addinity get updated */ - set_extra_move_desc(desc, mask); - - cpumask_copy(desc->affinity, mask); - - return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); -} - -static void -set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int dest; - unsigned int irq; - - irq = desc->irq; - cfg = desc->chip_data; - - spin_lock_irqsave(&ioapic_lock, flags); - dest = set_desc_affinity(desc, mask); - if (dest != BAD_APICID) { - /* Only the high 8 bits are valid. */ - dest = SET_APIC_LOGICAL_ID(dest); - __target_IO_APIC_irq(irq, dest, cfg); - } - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void -set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - - set_ioapic_affinity_irq_desc(desc, mask); -} -#endif /* CONFIG_SMP */ - /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) +static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) { struct irq_pin_list *entry; entry = cfg->irq_2_pin; if (!entry) { - entry = get_one_free_irq_2_pin(cpu); + entry = get_one_free_irq_2_pin(node); if (!entry) { printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", apic, pin); @@ -663,7 +521,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) entry = entry->next; } - entry->next = get_one_free_irq_2_pin(cpu); + entry->next = get_one_free_irq_2_pin(node); entry = entry->next; entry->apic = apic; entry->pin = pin; @@ -672,7 +530,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) /* * Reroute an IRQ to a different pin. */ -static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, +static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, int oldapic, int oldpin, int newapic, int newpin) { @@ -692,7 +550,7 @@ static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, /* why? call replace before add? */ if (!replaced) - add_pin_to_irq_cpu(cfg, cpu, newapic, newpin); + add_pin_to_irq_node(cfg, node, newapic, newpin); } static inline void io_apic_modify_irq(struct irq_cfg *cfg, @@ -850,7 +708,6 @@ static int __init ioapic_pirq_setup(char *str) __setup("pirq=", ioapic_pirq_setup); #endif /* CONFIG_X86_32 */ -#ifdef CONFIG_INTR_REMAP struct IO_APIC_route_entry **alloc_ioapic_entries(void) { int apic; @@ -948,20 +805,6 @@ int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) return 0; } -void reinit_intr_remapped_IO_APIC(int intr_remapping, - struct IO_APIC_route_entry **ioapic_entries) - -{ - /* - * for now plain restore of previous settings. - * TBD: In the case of OS enabling interrupt-remapping, - * IO-APIC RTE's need to be setup to point to interrupt-remapping - * table entries. for now, do a plain restore, and wait for - * the setup_IO_APIC_irqs() to do proper initialization. - */ - restore_IO_APIC_setup(ioapic_entries); -} - void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) { int apic; @@ -971,7 +814,6 @@ void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) kfree(ioapic_entries); } -#endif /* * Find the IRQ entry number of a certain pin. @@ -1032,54 +874,6 @@ static int __init find_isa_irq_apic(int irq, int type) return -1; } -/* - * Find a specific PCI IRQ entry. - * Not an __init, possibly needed by modules - */ -static int pin_2_irq(int idx, int apic, int pin); - -int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) -{ - int apic, i, best_guess = -1; - - apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", - bus, slot, pin); - if (test_bit(bus, mp_bus_not_pci)) { - apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); - return -1; - } - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].srcbus; - - for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || - mp_irqs[i].dstapic == MP_APIC_ALL) - break; - - if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].irqtype && - (bus == lbus) && - (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); - - if (!(apic || IO_APIC_IRQ(irq))) - continue; - - if (pin == (mp_irqs[i].srcbusirq & 3)) - return irq; - /* - * Use the first all-but-pin matching entry as a - * best-guess fuzzy result for broken mptables. - */ - if (best_guess < 0) - best_guess = irq; - } - } - return best_guess; -} - -EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); - #if defined(CONFIG_EISA) || defined(CONFIG_MCA) /* * EISA Edge/Level control register, ELCR @@ -1298,6 +1092,64 @@ static int pin_2_irq(int idx, int apic, int pin) return irq; } +/* + * Find a specific PCI IRQ entry. + * Not an __init, possibly needed by modules + */ +int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, + struct io_apic_irq_attr *irq_attr) +{ + int apic, i, best_guess = -1; + + apic_printk(APIC_DEBUG, + "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", + bus, slot, pin); + if (test_bit(bus, mp_bus_not_pci)) { + apic_printk(APIC_VERBOSE, + "PCI BIOS passed nonexistent PCI bus %d!\n", bus); + return -1; + } + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].srcbus; + + for (apic = 0; apic < nr_ioapics; apic++) + if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || + mp_irqs[i].dstapic == MP_APIC_ALL) + break; + + if (!test_bit(lbus, mp_bus_not_pci) && + !mp_irqs[i].irqtype && + (bus == lbus) && + (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); + + if (!(apic || IO_APIC_IRQ(irq))) + continue; + + if (pin == (mp_irqs[i].srcbusirq & 3)) { + set_io_apic_irq_attr(irq_attr, apic, + mp_irqs[i].dstirq, + irq_trigger(i), + irq_polarity(i)); + return irq; + } + /* + * Use the first all-but-pin matching entry as a + * best-guess fuzzy result for broken mptables. + */ + if (best_guess < 0) { + set_io_apic_irq_attr(irq_attr, apic, + mp_irqs[i].dstirq, + irq_trigger(i), + irq_polarity(i)); + best_guess = irq; + } + } + } + return best_guess; +} +EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); + void lock_vector_lock(void) { /* Used to the online set of cpus does not change @@ -1628,58 +1480,70 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq ioapic_write_entry(apic_id, pin, entry); } +static struct { + DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); +} mp_ioapic_routing[MAX_IO_APICS]; + static void __init setup_IO_APIC_irqs(void) { - int apic_id, pin, idx, irq; + int apic_id = 0, pin, idx, irq; int notcon = 0; struct irq_desc *desc; struct irq_cfg *cfg; - int cpu = boot_cpu_id; + int node = cpu_to_node(boot_cpu_id); apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { - for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { - - idx = find_irq_entry(apic_id, pin, mp_INT); - if (idx == -1) { - if (!notcon) { - notcon = 1; - apic_printk(APIC_VERBOSE, - KERN_DEBUG " %d-%d", - mp_ioapics[apic_id].apicid, pin); - } else - apic_printk(APIC_VERBOSE, " %d-%d", - mp_ioapics[apic_id].apicid, pin); - continue; - } - if (notcon) { - apic_printk(APIC_VERBOSE, - " (apicid-pin) not connected\n"); - notcon = 0; - } +#ifdef CONFIG_ACPI + if (!acpi_disabled && acpi_ioapic) { + apic_id = mp_find_ioapic(0); + if (apic_id < 0) + apic_id = 0; + } +#endif - irq = pin_2_irq(idx, apic_id, pin); + for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { + idx = find_irq_entry(apic_id, pin, mp_INT); + if (idx == -1) { + if (!notcon) { + notcon = 1; + apic_printk(APIC_VERBOSE, + KERN_DEBUG " %d-%d", + mp_ioapics[apic_id].apicid, pin); + } else + apic_printk(APIC_VERBOSE, " %d-%d", + mp_ioapics[apic_id].apicid, pin); + continue; + } + if (notcon) { + apic_printk(APIC_VERBOSE, + " (apicid-pin) not connected\n"); + notcon = 0; + } - /* - * Skip the timer IRQ if there's a quirk handler - * installed and if it returns 1: - */ - if (apic->multi_timer_check && - apic->multi_timer_check(apic_id, irq)) - continue; + irq = pin_2_irq(idx, apic_id, pin); - desc = irq_to_desc_alloc_cpu(irq, cpu); - if (!desc) { - printk(KERN_INFO "can not get irq_desc for %d\n", irq); - continue; - } - cfg = desc->chip_data; - add_pin_to_irq_cpu(cfg, cpu, apic_id, pin); + /* + * Skip the timer IRQ if there's a quirk handler + * installed and if it returns 1: + */ + if (apic->multi_timer_check && + apic->multi_timer_check(apic_id, irq)) + continue; - setup_IO_APIC_irq(apic_id, pin, irq, desc, - irq_trigger(idx), irq_polarity(idx)); + desc = irq_to_desc_alloc_node(irq, node); + if (!desc) { + printk(KERN_INFO "can not get irq_desc for %d\n", irq); + continue; } + cfg = desc->chip_data; + add_pin_to_irq_node(cfg, node, apic_id, pin); + /* + * don't mark it in pin_programmed, so later acpi could + * set it correctly when irq < 16 + */ + setup_IO_APIC_irq(apic_id, pin, irq, desc, + irq_trigger(idx), irq_polarity(idx)); } if (notcon) @@ -1869,7 +1733,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base) __apicdebuginit(void) print_local_APIC(void *dummy) { - unsigned int v, ver, maxlvt; + unsigned int i, v, ver, maxlvt; u64 icr; if (apic_verbosity == APIC_QUIET) @@ -1957,6 +1821,18 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); v = apic_read(APIC_TDCR); printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); + + if (boot_cpu_has(X86_FEATURE_EXTAPIC)) { + v = apic_read(APIC_EFEAT); + maxlvt = (v >> 16) & 0xff; + printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v); + v = apic_read(APIC_ECTRL); + printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v); + for (i = 0; i < maxlvt; i++) { + v = apic_read(APIC_EILVTn(i)); + printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v); + } + } printk("\n"); } @@ -2005,6 +1881,11 @@ __apicdebuginit(void) print_PIC(void) __apicdebuginit(int) print_all_ICs(void) { print_PIC(); + + /* don't print out if apic is not there */ + if (!cpu_has_apic || disable_apic) + return 0; + print_all_local_APICs(); print_IO_APIC(); @@ -2360,6 +2241,118 @@ static int ioapic_retrigger_irq(unsigned int irq) */ #ifdef CONFIG_SMP +static void send_cleanup_vector(struct irq_cfg *cfg) +{ + cpumask_var_t cleanup_mask; + + if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { + unsigned int i; + cfg->move_cleanup_count = 0; + for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) + cfg->move_cleanup_count++; + for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) + apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); + } else { + cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); + cfg->move_cleanup_count = cpumask_weight(cleanup_mask); + apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + free_cpumask_var(cleanup_mask); + } + cfg->move_in_progress = 0; +} + +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) +{ + int apic, pin; + struct irq_pin_list *entry; + u8 vector = cfg->vector; + + entry = cfg->irq_2_pin; + for (;;) { + unsigned int reg; + + if (!entry) + break; + + apic = entry->apic; + pin = entry->pin; + /* + * With interrupt-remapping, destination information comes + * from interrupt-remapping table entry. + */ + if (!irq_remapped(irq)) + io_apic_write(apic, 0x11 + pin*2, dest); + reg = io_apic_read(apic, 0x10 + pin*2); + reg &= ~IO_APIC_REDIR_VECTOR_MASK; + reg |= vector; + io_apic_modify(apic, 0x10 + pin*2, reg); + if (!entry->next) + break; + entry = entry->next; + } +} + +static int +assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); + +/* + * Either sets desc->affinity to a valid value, and returns + * ->cpu_mask_to_apicid of that, or returns BAD_APICID and + * leaves desc->affinity untouched. + */ +static unsigned int +set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) +{ + struct irq_cfg *cfg; + unsigned int irq; + + if (!cpumask_intersects(mask, cpu_online_mask)) + return BAD_APICID; + + irq = desc->irq; + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) + return BAD_APICID; + + cpumask_copy(desc->affinity, mask); + + return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); +} + +static int +set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int dest; + unsigned int irq; + int ret = -1; + + irq = desc->irq; + cfg = desc->chip_data; + + spin_lock_irqsave(&ioapic_lock, flags); + dest = set_desc_affinity(desc, mask); + if (dest != BAD_APICID) { + /* Only the high 8 bits are valid. */ + dest = SET_APIC_LOGICAL_ID(dest); + __target_IO_APIC_irq(irq, dest, cfg); + ret = 0; + } + spin_unlock_irqrestore(&ioapic_lock, flags); + + return ret; +} + +static int +set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) +{ + struct irq_desc *desc; + + desc = irq_to_desc(irq); + + return set_ioapic_affinity_irq_desc(desc, mask); +} #ifdef CONFIG_INTR_REMAP @@ -2374,26 +2367,25 @@ static int ioapic_retrigger_irq(unsigned int irq) * Real vector that is used for interrupting cpu will be coming from * the interrupt-remapping table entry. */ -static void +static int migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; struct irte irte; unsigned int dest; unsigned int irq; + int ret = -1; if (!cpumask_intersects(mask, cpu_online_mask)) - return; + return ret; irq = desc->irq; if (get_irte(irq, &irte)) - return; + return ret; cfg = desc->chip_data; if (assign_irq_vector(irq, cfg, mask)) - return; - - set_extra_move_desc(desc, mask); + return ret; dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); @@ -2409,27 +2401,30 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) send_cleanup_vector(cfg); cpumask_copy(desc->affinity, mask); + + return 0; } /* * Migrates the IRQ destination in the process context. */ -static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, +static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { - migrate_ioapic_irq_desc(desc, mask); + return migrate_ioapic_irq_desc(desc, mask); } -static void set_ir_ioapic_affinity_irq(unsigned int irq, +static int set_ir_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); - set_ir_ioapic_affinity_irq_desc(desc, mask); + return set_ir_ioapic_affinity_irq_desc(desc, mask); } #else -static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, +static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { + return 0; } #endif @@ -2491,86 +2486,19 @@ static void irq_complete_move(struct irq_desc **descp) struct irq_cfg *cfg = desc->chip_data; unsigned vector, me; - if (likely(!cfg->move_in_progress)) { -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - if (likely(!cfg->move_desc_pending)) - return; - - /* domain has not changed, but affinity did */ - me = smp_processor_id(); - if (cpumask_test_cpu(me, desc->affinity)) { - *descp = desc = move_irq_desc(desc, me); - /* get the new one */ - cfg = desc->chip_data; - cfg->move_desc_pending = 0; - } -#endif + if (likely(!cfg->move_in_progress)) return; - } vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) { -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - *descp = desc = move_irq_desc(desc, me); - /* get the new one */ - cfg = desc->chip_data; -#endif + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) send_cleanup_vector(cfg); - } } #else static inline void irq_complete_move(struct irq_desc **descp) {} #endif -static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) -{ - int apic, pin; - struct irq_pin_list *entry; - - entry = cfg->irq_2_pin; - for (;;) { - - if (!entry) - break; - - apic = entry->apic; - pin = entry->pin; - io_apic_eoi(apic, pin); - entry = entry->next; - } -} - -static void -eoi_ioapic_irq(struct irq_desc *desc) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int irq; - - irq = desc->irq; - cfg = desc->chip_data; - - spin_lock_irqsave(&ioapic_lock, flags); - __eoi_ioapic_irq(irq, cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -#ifdef CONFIG_X86_X2APIC -static void ack_x2apic_level(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - ack_x2APIC_irq(); - eoi_ioapic_irq(desc); -} - -static void ack_x2apic_edge(unsigned int irq) -{ - ack_x2APIC_irq(); -} -#endif - static void ack_apic_edge(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -2634,9 +2562,6 @@ static void ack_apic_level(unsigned int irq) */ ack_APIC_irq(); - if (irq_remapped(irq)) - eoi_ioapic_irq(desc); - /* Now we can move and renable the irq */ if (unlikely(do_unmask_irq)) { /* Only migrate the irq if the ack has been received. @@ -2683,22 +2608,50 @@ static void ack_apic_level(unsigned int irq) } #ifdef CONFIG_INTR_REMAP +static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +{ + int apic, pin; + struct irq_pin_list *entry; + + entry = cfg->irq_2_pin; + for (;;) { + + if (!entry) + break; + + apic = entry->apic; + pin = entry->pin; + io_apic_eoi(apic, pin); + entry = entry->next; + } +} + +static void +eoi_ioapic_irq(struct irq_desc *desc) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int irq; + + irq = desc->irq; + cfg = desc->chip_data; + + spin_lock_irqsave(&ioapic_lock, flags); + __eoi_ioapic_irq(irq, cfg); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + static void ir_ack_apic_edge(unsigned int irq) { -#ifdef CONFIG_X86_X2APIC - if (x2apic_enabled()) - return ack_x2apic_edge(irq); -#endif - return ack_apic_edge(irq); + ack_APIC_irq(); } static void ir_ack_apic_level(unsigned int irq) { -#ifdef CONFIG_X86_X2APIC - if (x2apic_enabled()) - return ack_x2apic_level(irq); -#endif - return ack_apic_level(irq); + struct irq_desc *desc = irq_to_desc(irq); + + ack_APIC_irq(); + eoi_ioapic_irq(desc); } #endif /* CONFIG_INTR_REMAP */ @@ -2903,7 +2856,7 @@ static inline void __init check_timer(void) { struct irq_desc *desc = irq_to_desc(0); struct irq_cfg *cfg = desc->chip_data; - int cpu = boot_cpu_id; + int node = cpu_to_node(boot_cpu_id); int apic1, pin1, apic2, pin2; unsigned long flags; int no_pin1 = 0; @@ -2969,7 +2922,7 @@ static inline void __init check_timer(void) * Ok, does IRQ0 through the IOAPIC work? */ if (no_pin1) { - add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); + add_pin_to_irq_node(cfg, node, apic1, pin1); setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); } else { /* for edge trigger, setup_IO_APIC_irq already @@ -3006,7 +2959,7 @@ static inline void __init check_timer(void) /* * legacy devices should be connected to IO APIC #0 */ - replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); + replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); enable_8259A_irq(0); if (timer_irq_works()) { @@ -3218,14 +3171,13 @@ static int nr_irqs_gsi = NR_IRQS_LEGACY; /* * Dynamic irq allocate and deallocation */ -unsigned int create_irq_nr(unsigned int irq_want) +unsigned int create_irq_nr(unsigned int irq_want, int node) { /* Allocate an unused irq */ unsigned int irq; unsigned int new; unsigned long flags; struct irq_cfg *cfg_new = NULL; - int cpu = boot_cpu_id; struct irq_desc *desc_new = NULL; irq = 0; @@ -3234,7 +3186,7 @@ unsigned int create_irq_nr(unsigned int irq_want) spin_lock_irqsave(&vector_lock, flags); for (new = irq_want; new < nr_irqs; new++) { - desc_new = irq_to_desc_alloc_cpu(new, cpu); + desc_new = irq_to_desc_alloc_node(new, node); if (!desc_new) { printk(KERN_INFO "can not get irq_desc for %d\n", new); continue; @@ -3243,6 +3195,9 @@ unsigned int create_irq_nr(unsigned int irq_want) if (cfg_new->vector != 0) continue; + + desc_new = move_irq_desc(desc_new, node); + if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) irq = new; break; @@ -3260,11 +3215,12 @@ unsigned int create_irq_nr(unsigned int irq_want) int create_irq(void) { + int node = cpu_to_node(boot_cpu_id); unsigned int irq_want; int irq; irq_want = nr_irqs_gsi; - irq = create_irq_nr(irq_want); + irq = create_irq_nr(irq_want, node); if (irq == 0) irq = -1; @@ -3366,7 +3322,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms } #ifdef CONFIG_SMP -static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) +static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3375,7 +3331,7 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; @@ -3387,13 +3343,15 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg_desc(desc, &msg); + + return 0; } #ifdef CONFIG_INTR_REMAP /* * Migrate the MSI irq to another cpumask. This migration is * done in the process context using interrupt-remapping hardware. */ -static void +static int ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); @@ -3402,11 +3360,11 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) struct irte irte; if (get_irte(irq, &irte)) - return; + return -1; dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -3423,6 +3381,8 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) */ if (cfg->move_in_progress) send_cleanup_vector(cfg); + + return 0; } #endif @@ -3518,15 +3478,17 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) unsigned int irq_want; struct intel_iommu *iommu = NULL; int index = 0; + int node; /* x86 doesn't support multiple MSI yet */ if (type == PCI_CAP_ID_MSI && nvec > 1) return 1; + node = dev_to_node(&dev->dev); irq_want = nr_irqs_gsi; sub_handle = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = create_irq_nr(irq_want); + irq = create_irq_nr(irq_want, node); if (irq == 0) return -1; irq_want = irq + 1; @@ -3576,7 +3538,7 @@ void arch_teardown_msi_irq(unsigned int irq) #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) #ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) +static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3585,7 +3547,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; @@ -3597,6 +3559,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); dmar_msi_write(irq, &msg); + + return 0; } #endif /* CONFIG_SMP */ @@ -3630,7 +3594,7 @@ int arch_setup_dmar_msi(unsigned int irq) #ifdef CONFIG_HPET_TIMER #ifdef CONFIG_SMP -static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) +static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3639,7 +3603,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; @@ -3651,6 +3615,8 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); hpet_msi_write(irq, &msg); + + return 0; } #endif /* CONFIG_SMP */ @@ -3707,7 +3673,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) write_ht_irq_msg(irq, &msg); } -static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) +static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3715,11 +3681,13 @@ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; target_ht_irq(irq, dest, cfg->vector); + + return 0; } #endif @@ -3794,6 +3762,8 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, unsigned long flags; int err; + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); + cfg = irq_cfg(irq); err = assign_irq_vector(irq, cfg, eligible_cpu); @@ -3807,15 +3777,13 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, mmr_value = 0; entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); - - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); @@ -3833,10 +3801,10 @@ void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) struct uv_IO_APIC_route_entry *entry; int mmr_pnode; + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); + mmr_value = 0; entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); - entry->mask = 1; mmr_pnode = uv_blade_to_pnode(mmr_blade); @@ -3900,6 +3868,71 @@ int __init arch_probe_nr_irqs(void) } #endif +static int __io_apic_set_pci_routing(struct device *dev, int irq, + struct io_apic_irq_attr *irq_attr) +{ + struct irq_desc *desc; + struct irq_cfg *cfg; + int node; + int ioapic, pin; + int trigger, polarity; + + ioapic = irq_attr->ioapic; + if (!IO_APIC_IRQ(irq)) { + apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", + ioapic); + return -EINVAL; + } + + if (dev) + node = dev_to_node(dev); + else + node = cpu_to_node(boot_cpu_id); + + desc = irq_to_desc_alloc_node(irq, node); + if (!desc) { + printk(KERN_INFO "can not get irq_desc %d\n", irq); + return 0; + } + + pin = irq_attr->ioapic_pin; + trigger = irq_attr->trigger; + polarity = irq_attr->polarity; + + /* + * IRQs < 16 are already in the irq_2_pin[] map + */ + if (irq >= NR_IRQS_LEGACY) { + cfg = desc->chip_data; + add_pin_to_irq_node(cfg, node, ioapic, pin); + } + + setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity); + + return 0; +} + +int io_apic_set_pci_routing(struct device *dev, int irq, + struct io_apic_irq_attr *irq_attr) +{ + int ioapic, pin; + /* + * Avoid pin reprogramming. PRTs typically include entries + * with redundant pin->gsi mappings (but unique PCI devices); + * we only program the IOAPIC on the first. + */ + ioapic = irq_attr->ioapic; + pin = irq_attr->ioapic_pin; + if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) { + pr_debug("Pin %d-%d already programmed\n", + mp_ioapics[ioapic].apicid, pin); + return 0; + } + set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed); + + return __io_apic_set_pci_routing(dev, irq, irq_attr); +} + /* -------------------------------------------------------------------------- ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ @@ -3980,6 +4013,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) return apic_id; } +#endif int __init io_apic_get_version(int ioapic) { @@ -3992,39 +4026,6 @@ int __init io_apic_get_version(int ioapic) return reg_01.bits.version; } -#endif - -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) -{ - struct irq_desc *desc; - struct irq_cfg *cfg; - int cpu = boot_cpu_id; - - if (!IO_APIC_IRQ(irq)) { - apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", - ioapic); - return -EINVAL; - } - - desc = irq_to_desc_alloc_cpu(irq, cpu); - if (!desc) { - printk(KERN_INFO "can not get irq_desc %d\n", irq); - return 0; - } - - /* - * IRQs < 16 are already in the irq_2_pin[] map - */ - if (irq >= NR_IRQS_LEGACY) { - cfg = desc->chip_data; - add_pin_to_irq_cpu(cfg, cpu, ioapic, pin); - } - - setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity); - - return 0; -} - int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) { @@ -4055,51 +4056,44 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) #ifdef CONFIG_SMP void __init setup_ioapic_dest(void) { - int pin, ioapic, irq, irq_entry; + int pin, ioapic = 0, irq, irq_entry; struct irq_desc *desc; - struct irq_cfg *cfg; const struct cpumask *mask; if (skip_ioapic_setup == 1) return; - for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { - for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { - irq_entry = find_irq_entry(ioapic, pin, mp_INT); - if (irq_entry == -1) - continue; - irq = pin_2_irq(irq_entry, ioapic, pin); - - /* setup_IO_APIC_irqs could fail to get vector for some device - * when you have too many devices, because at that time only boot - * cpu is online. - */ - desc = irq_to_desc(irq); - cfg = desc->chip_data; - if (!cfg->vector) { - setup_IO_APIC_irq(ioapic, pin, irq, desc, - irq_trigger(irq_entry), - irq_polarity(irq_entry)); - continue; +#ifdef CONFIG_ACPI + if (!acpi_disabled && acpi_ioapic) { + ioapic = mp_find_ioapic(0); + if (ioapic < 0) + ioapic = 0; + } +#endif - } + for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { + irq_entry = find_irq_entry(ioapic, pin, mp_INT); + if (irq_entry == -1) + continue; + irq = pin_2_irq(irq_entry, ioapic, pin); - /* - * Honour affinities which have been set in early boot - */ - if (desc->status & - (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) - mask = desc->affinity; - else - mask = apic->target_cpus(); + desc = irq_to_desc(irq); - if (intr_remapping_enabled) - set_ir_ioapic_affinity_irq_desc(desc, mask); - else - set_ioapic_affinity_irq_desc(desc, mask); - } + /* + * Honour affinities which have been set in early boot + */ + if (desc->status & + (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) + mask = desc->affinity; + else + mask = apic->target_cpus(); + if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq_desc(desc, mask); + else + set_ioapic_affinity_irq_desc(desc, mask); } + } #endif diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index ce4fbfa315a1..a691302dc3ff 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -104,7 +104,7 @@ static __init void nmi_cpu_busy(void *data) } #endif -static void report_broken_nmi(int cpu, int *prev_nmi_count) +static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count) { printk(KERN_CONT "\n"); diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 01eda2ac65e4..440a8bccd91a 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -160,7 +160,6 @@ extern struct apic apic_summit; extern struct apic apic_bigsmp; extern struct apic apic_es7000; extern struct apic apic_es7000_cluster; -extern struct apic apic_default; struct apic *apic = &apic_default; EXPORT_SYMBOL_GPL(apic); diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 1783652bb0e5..bc3e880f9b82 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -50,7 +50,7 @@ static struct apic *apic_probe[] __initdata = { void __init default_setup_apic_routing(void) { #ifdef CONFIG_X86_X2APIC - if (x2apic && (apic != &apic_x2apic_phys && + if (x2apic_mode && (apic != &apic_x2apic_phys && #ifdef CONFIG_X86_UV apic != &apic_x2apic_uv_x && #endif diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 9cfe1f415d81..344eee4ac0a4 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -173,13 +173,6 @@ static inline int is_WPEG(struct rio_detail *rio){ rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); } - -/* In clustered mode, the high nibble of APIC ID is a cluster number. - * The low nibble is a 4-bit bitmap. */ -#define XAPIC_DEST_CPUS_SHIFT 4 -#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) -#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) - #define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER) static const struct cpumask *summit_target_cpus(void) diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 4a903e2f0d17..8e4cbb255c38 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -10,7 +10,7 @@ #include <asm/apic.h> #include <asm/ipi.h> -DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); +static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 2bda69352976..ef0ae207a7c8 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -105,7 +105,7 @@ static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) cpumask_set_cpu(cpu, retmask); } -static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) +static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) { #ifdef CONFIG_SMP unsigned long val; @@ -562,7 +562,7 @@ void __init uv_system_init(void) union uvh_node_id_u node_id; unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; - int max_pnode = 0; + int gnode_extra, max_pnode = 0; unsigned long mmr_base, present, paddr; unsigned short pnode_mask; @@ -574,6 +574,13 @@ void __init uv_system_init(void) mmr_base = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & ~UV_MMR_ENABLE; + pnode_mask = (1 << n_val) - 1; + node_id.v = uv_read_local_mmr(UVH_NODE_ID); + gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; + gnode_upper = ((unsigned long)gnode_extra << m_val); + printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n", + n_val, m_val, gnode_upper, gnode_extra); + printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) @@ -583,15 +590,18 @@ void __init uv_system_init(void) bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); uv_blade_info = kmalloc(bytes, GFP_KERNEL); + BUG_ON(!uv_blade_info); get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes(); uv_node_to_blade = kmalloc(bytes, GFP_KERNEL); + BUG_ON(!uv_node_to_blade); memset(uv_node_to_blade, 255, bytes); bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus(); uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL); + BUG_ON(!uv_cpu_to_blade); memset(uv_cpu_to_blade, 255, bytes); blade = 0; @@ -607,11 +617,6 @@ void __init uv_system_init(void) } } - pnode_mask = (1 << n_val) - 1; - node_id.v = uv_read_local_mmr(UVH_NODE_ID); - gnode_upper = (((unsigned long)node_id.s.node_id) & - ~((1 << n_val) - 1)) << m_val; - uv_bios_init(); uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id, &sn_region_size); @@ -634,6 +639,7 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; + uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 5a6aa1c1162f..1a830cbd7015 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -146,4 +146,5 @@ void foo(void) OFFSET(BP_loadflags, boot_params, hdr.loadflags); OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); OFFSET(BP_version, boot_params, hdr.version); + OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); } diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index e72f062fb4b5..898ecc47e129 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -125,6 +125,7 @@ int main(void) OFFSET(BP_loadflags, boot_params, hdr.loadflags); OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); OFFSET(BP_version, boot_params, hdr.version); + OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); BLANK(); DEFINE(PAGE_SIZE_asm, PAGE_SIZE); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 7e4a459daa64..e5b27d8f1b47 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -6,6 +6,7 @@ #include <asm/processor.h> #include <asm/apic.h> #include <asm/cpu.h> +#include <asm/pci-direct.h> #ifdef CONFIG_X86_64 # include <asm/numa_64.h> @@ -272,7 +273,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) int cpu = smp_processor_id(); int node; - unsigned apicid = hard_smp_processor_id(); + unsigned apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid; node = c->phys_proc_id; if (apicid_to_node[apicid] != NUMA_NO_NODE) @@ -351,6 +352,15 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) (c->x86_model == 8 && c->x86_mask >= 8)) set_cpu_cap(c, X86_FEATURE_K6_MTRR); #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) + /* check CPU config space for extended APIC ID */ + if (c->x86 >= 0xf) { + unsigned int val; + val = read_pci_config(0, 24, 0, 0x68); + if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18))) + set_cpu_cap(c, X86_FEATURE_EXTD_APICID); + } +#endif } static void __cpuinit init_amd(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c1caefc82e62..b0517aa2bd3b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -114,6 +114,13 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +static int __init x86_xsave_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_XSAVE); + return 1; +} +__setup("noxsave", x86_xsave_setup); + #ifdef CONFIG_X86_32 static int cachesize_override __cpuinitdata = -1; static int disable_x86_serial_nr __cpuinitdata = 1; @@ -292,7 +299,8 @@ static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c) return NULL; /* Not found */ } -__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; +__u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata; +__u32 cpu_caps_set[NCAPINTS] __cpuinitdata; void load_percpu_segment(int cpu) { @@ -761,6 +769,12 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_identify) this_cpu->c_identify(c); + /* Clear/Set all flags overriden by options, after probe */ + for (i = 0; i < NCAPINTS; i++) { + c->x86_capability[i] &= ~cpu_caps_cleared[i]; + c->x86_capability[i] |= cpu_caps_set[i]; + } + #ifdef CONFIG_X86_64 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); #endif @@ -806,6 +820,16 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) #endif init_hypervisor(c); + + /* + * Clear/Set all flags overriden by options, need do it + * before following smp all cpus cap AND. + */ + for (i = 0; i < NCAPINTS; i++) { + c->x86_capability[i] &= ~cpu_caps_cleared[i]; + c->x86_capability[i] |= cpu_caps_set[i]; + } + /* * On SMP, boot_cpu_data holds the common feature set between * all CPUs; so make sure that we indicate which features are @@ -818,10 +842,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; } - /* Clear all flags overriden by options */ - for (i = 0; i < NCAPINTS; i++) - c->x86_capability[i] &= ~cleared_cpu_caps[i]; - #ifdef CONFIG_X86_MCE /* Init Machine Check Exception if available. */ mcheck_init(c); diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c index 46e29ab96c6a..6b2a52dd0403 100644 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ b/arch/x86/kernel/cpu/cpu_debug.c @@ -32,9 +32,7 @@ static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]); static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]); -static DEFINE_PER_CPU(unsigned, cpu_modelflag); static DEFINE_PER_CPU(int, cpu_priv_count); -static DEFINE_PER_CPU(unsigned, cpu_model); static DEFINE_MUTEX(cpu_debug_lock); @@ -80,302 +78,102 @@ static struct cpu_file_base cpu_file[] = { { "value", CPU_REG_ALL, 1 }, }; -/* Intel Registers Range */ -static struct cpu_debug_range cpu_intel_range[] = { - { 0x00000000, 0x00000001, CPU_MC, CPU_INTEL_ALL }, - { 0x00000006, 0x00000007, CPU_MONITOR, CPU_CX_AT_XE }, - { 0x00000010, 0x00000010, CPU_TIME, CPU_INTEL_ALL }, - { 0x00000011, 0x00000013, CPU_PMC, CPU_INTEL_PENTIUM }, - { 0x00000017, 0x00000017, CPU_PLATFORM, CPU_PX_CX_AT_XE }, - { 0x0000001B, 0x0000001B, CPU_APIC, CPU_P6_CX_AT_XE }, - - { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_PX_CX_AT_XE }, - { 0x0000002B, 0x0000002B, CPU_POWERON, CPU_INTEL_XEON }, - { 0x0000002C, 0x0000002C, CPU_FREQ, CPU_INTEL_XEON }, - { 0x0000003A, 0x0000003A, CPU_CONTROL, CPU_CX_AT_XE }, - - { 0x00000040, 0x00000043, CPU_LBRANCH, CPU_PM_CX_AT_XE }, - { 0x00000044, 0x00000047, CPU_LBRANCH, CPU_PM_CO_AT }, - { 0x00000060, 0x00000063, CPU_LBRANCH, CPU_C2_AT }, - { 0x00000064, 0x00000067, CPU_LBRANCH, CPU_INTEL_ATOM }, - - { 0x00000079, 0x00000079, CPU_BIOS, CPU_P6_CX_AT_XE }, - { 0x00000088, 0x0000008A, CPU_CACHE, CPU_INTEL_P6 }, - { 0x0000008B, 0x0000008B, CPU_BIOS, CPU_P6_CX_AT_XE }, - { 0x0000009B, 0x0000009B, CPU_MONITOR, CPU_INTEL_XEON }, - - { 0x000000C1, 0x000000C2, CPU_PMC, CPU_P6_CX_AT }, - { 0x000000CD, 0x000000CD, CPU_FREQ, CPU_CX_AT }, - { 0x000000E7, 0x000000E8, CPU_PERF, CPU_CX_AT }, - { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_P6_CX_XE }, - - { 0x00000116, 0x00000116, CPU_CACHE, CPU_INTEL_P6 }, - { 0x00000118, 0x00000118, CPU_CACHE, CPU_INTEL_P6 }, - { 0x00000119, 0x00000119, CPU_CACHE, CPU_INTEL_PX }, - { 0x0000011A, 0x0000011B, CPU_CACHE, CPU_INTEL_P6 }, - { 0x0000011E, 0x0000011E, CPU_CACHE, CPU_PX_CX_AT }, - - { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_P6_CX_AT_XE }, - { 0x00000179, 0x0000017A, CPU_MC, CPU_PX_CX_AT_XE }, - { 0x0000017B, 0x0000017B, CPU_MC, CPU_P6_XE }, - { 0x00000186, 0x00000187, CPU_PMC, CPU_P6_CX_AT }, - { 0x00000198, 0x00000199, CPU_PERF, CPU_PM_CX_AT_XE }, - { 0x0000019A, 0x0000019A, CPU_TIME, CPU_PM_CX_AT_XE }, - { 0x0000019B, 0x0000019D, CPU_THERM, CPU_PM_CX_AT_XE }, - { 0x000001A0, 0x000001A0, CPU_MISC, CPU_PM_CX_AT_XE }, - - { 0x000001C9, 0x000001C9, CPU_LBRANCH, CPU_PM_CX_AT }, - { 0x000001D7, 0x000001D8, CPU_LBRANCH, CPU_INTEL_XEON }, - { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_CX_AT_XE }, - { 0x000001DA, 0x000001DA, CPU_LBRANCH, CPU_INTEL_XEON }, - { 0x000001DB, 0x000001DB, CPU_LBRANCH, CPU_P6_XE }, - { 0x000001DC, 0x000001DC, CPU_LBRANCH, CPU_INTEL_P6 }, - { 0x000001DD, 0x000001DE, CPU_LBRANCH, CPU_PX_CX_AT_XE }, - { 0x000001E0, 0x000001E0, CPU_LBRANCH, CPU_INTEL_P6 }, - - { 0x00000200, 0x0000020F, CPU_MTRR, CPU_P6_CX_XE }, - { 0x00000250, 0x00000250, CPU_MTRR, CPU_P6_CX_XE }, - { 0x00000258, 0x00000259, CPU_MTRR, CPU_P6_CX_XE }, - { 0x00000268, 0x0000026F, CPU_MTRR, CPU_P6_CX_XE }, - { 0x00000277, 0x00000277, CPU_PAT, CPU_C2_AT_XE }, - { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_P6_CX_XE }, - - { 0x00000300, 0x00000308, CPU_PMC, CPU_INTEL_XEON }, - { 0x00000309, 0x0000030B, CPU_PMC, CPU_C2_AT_XE }, - { 0x0000030C, 0x00000311, CPU_PMC, CPU_INTEL_XEON }, - { 0x00000345, 0x00000345, CPU_PMC, CPU_C2_AT }, - { 0x00000360, 0x00000371, CPU_PMC, CPU_INTEL_XEON }, - { 0x0000038D, 0x00000390, CPU_PMC, CPU_C2_AT }, - { 0x000003A0, 0x000003BE, CPU_PMC, CPU_INTEL_XEON }, - { 0x000003C0, 0x000003CD, CPU_PMC, CPU_INTEL_XEON }, - { 0x000003E0, 0x000003E1, CPU_PMC, CPU_INTEL_XEON }, - { 0x000003F0, 0x000003F0, CPU_PMC, CPU_INTEL_XEON }, - { 0x000003F1, 0x000003F1, CPU_PMC, CPU_C2_AT_XE }, - { 0x000003F2, 0x000003F2, CPU_PMC, CPU_INTEL_XEON }, - - { 0x00000400, 0x00000402, CPU_MC, CPU_PM_CX_AT_XE }, - { 0x00000403, 0x00000403, CPU_MC, CPU_INTEL_XEON }, - { 0x00000404, 0x00000406, CPU_MC, CPU_PM_CX_AT_XE }, - { 0x00000407, 0x00000407, CPU_MC, CPU_INTEL_XEON }, - { 0x00000408, 0x0000040A, CPU_MC, CPU_PM_CX_AT_XE }, - { 0x0000040B, 0x0000040B, CPU_MC, CPU_INTEL_XEON }, - { 0x0000040C, 0x0000040E, CPU_MC, CPU_PM_CX_XE }, - { 0x0000040F, 0x0000040F, CPU_MC, CPU_INTEL_XEON }, - { 0x00000410, 0x00000412, CPU_MC, CPU_PM_CX_AT_XE }, - { 0x00000413, 0x00000417, CPU_MC, CPU_CX_AT_XE }, - { 0x00000480, 0x0000048B, CPU_VMX, CPU_CX_AT_XE }, - - { 0x00000600, 0x00000600, CPU_DEBUG, CPU_PM_CX_AT_XE }, - { 0x00000680, 0x0000068F, CPU_LBRANCH, CPU_INTEL_XEON }, - { 0x000006C0, 0x000006CF, CPU_LBRANCH, CPU_INTEL_XEON }, - - { 0x000107CC, 0x000107D3, CPU_PMC, CPU_INTEL_XEON_MP }, - - { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_INTEL_XEON }, - { 0xC0000081, 0xC0000082, CPU_CALL, CPU_INTEL_XEON }, - { 0xC0000084, 0xC0000084, CPU_CALL, CPU_INTEL_XEON }, - { 0xC0000100, 0xC0000102, CPU_BASE, CPU_INTEL_XEON }, +/* CPU Registers Range */ +static struct cpu_debug_range cpu_reg_range[] = { + { 0x00000000, 0x00000001, CPU_MC, }, + { 0x00000006, 0x00000007, CPU_MONITOR, }, + { 0x00000010, 0x00000010, CPU_TIME, }, + { 0x00000011, 0x00000013, CPU_PMC, }, + { 0x00000017, 0x00000017, CPU_PLATFORM, }, + { 0x0000001B, 0x0000001B, CPU_APIC, }, + { 0x0000002A, 0x0000002B, CPU_POWERON, }, + { 0x0000002C, 0x0000002C, CPU_FREQ, }, + { 0x0000003A, 0x0000003A, CPU_CONTROL, }, + { 0x00000040, 0x00000047, CPU_LBRANCH, }, + { 0x00000060, 0x00000067, CPU_LBRANCH, }, + { 0x00000079, 0x00000079, CPU_BIOS, }, + { 0x00000088, 0x0000008A, CPU_CACHE, }, + { 0x0000008B, 0x0000008B, CPU_BIOS, }, + { 0x0000009B, 0x0000009B, CPU_MONITOR, }, + { 0x000000C1, 0x000000C4, CPU_PMC, }, + { 0x000000CD, 0x000000CD, CPU_FREQ, }, + { 0x000000E7, 0x000000E8, CPU_PERF, }, + { 0x000000FE, 0x000000FE, CPU_MTRR, }, + + { 0x00000116, 0x0000011E, CPU_CACHE, }, + { 0x00000174, 0x00000176, CPU_SYSENTER, }, + { 0x00000179, 0x0000017B, CPU_MC, }, + { 0x00000186, 0x00000189, CPU_PMC, }, + { 0x00000198, 0x00000199, CPU_PERF, }, + { 0x0000019A, 0x0000019A, CPU_TIME, }, + { 0x0000019B, 0x0000019D, CPU_THERM, }, + { 0x000001A0, 0x000001A0, CPU_MISC, }, + { 0x000001C9, 0x000001C9, CPU_LBRANCH, }, + { 0x000001D7, 0x000001D8, CPU_LBRANCH, }, + { 0x000001D9, 0x000001D9, CPU_DEBUG, }, + { 0x000001DA, 0x000001E0, CPU_LBRANCH, }, + + { 0x00000200, 0x0000020F, CPU_MTRR, }, + { 0x00000250, 0x00000250, CPU_MTRR, }, + { 0x00000258, 0x00000259, CPU_MTRR, }, + { 0x00000268, 0x0000026F, CPU_MTRR, }, + { 0x00000277, 0x00000277, CPU_PAT, }, + { 0x000002FF, 0x000002FF, CPU_MTRR, }, + + { 0x00000300, 0x00000311, CPU_PMC, }, + { 0x00000345, 0x00000345, CPU_PMC, }, + { 0x00000360, 0x00000371, CPU_PMC, }, + { 0x0000038D, 0x00000390, CPU_PMC, }, + { 0x000003A0, 0x000003BE, CPU_PMC, }, + { 0x000003C0, 0x000003CD, CPU_PMC, }, + { 0x000003E0, 0x000003E1, CPU_PMC, }, + { 0x000003F0, 0x000003F2, CPU_PMC, }, + + { 0x00000400, 0x00000417, CPU_MC, }, + { 0x00000480, 0x0000048B, CPU_VMX, }, + + { 0x00000600, 0x00000600, CPU_DEBUG, }, + { 0x00000680, 0x0000068F, CPU_LBRANCH, }, + { 0x000006C0, 0x000006CF, CPU_LBRANCH, }, + + { 0x000107CC, 0x000107D3, CPU_PMC, }, + + { 0xC0000080, 0xC0000080, CPU_FEATURES, }, + { 0xC0000081, 0xC0000084, CPU_CALL, }, + { 0xC0000100, 0xC0000102, CPU_BASE, }, + { 0xC0000103, 0xC0000103, CPU_TIME, }, + + { 0xC0010000, 0xC0010007, CPU_PMC, }, + { 0xC0010010, 0xC0010010, CPU_CONF, }, + { 0xC0010015, 0xC0010015, CPU_CONF, }, + { 0xC0010016, 0xC001001A, CPU_MTRR, }, + { 0xC001001D, 0xC001001D, CPU_MTRR, }, + { 0xC001001F, 0xC001001F, CPU_CONF, }, + { 0xC0010030, 0xC0010035, CPU_BIOS, }, + { 0xC0010044, 0xC0010048, CPU_MC, }, + { 0xC0010050, 0xC0010056, CPU_SMM, }, + { 0xC0010058, 0xC0010058, CPU_CONF, }, + { 0xC0010060, 0xC0010060, CPU_CACHE, }, + { 0xC0010061, 0xC0010068, CPU_SMM, }, + { 0xC0010069, 0xC001006B, CPU_SMM, }, + { 0xC0010070, 0xC0010071, CPU_SMM, }, + { 0xC0010111, 0xC0010113, CPU_SMM, }, + { 0xC0010114, 0xC0010118, CPU_SVM, }, + { 0xC0010140, 0xC0010141, CPU_OSVM, }, + { 0xC0011022, 0xC0011023, CPU_CONF, }, }; -/* AMD Registers Range */ -static struct cpu_debug_range cpu_amd_range[] = { - { 0x00000000, 0x00000001, CPU_MC, CPU_K10_PLUS, }, - { 0x00000010, 0x00000010, CPU_TIME, CPU_K8_PLUS, }, - { 0x0000001B, 0x0000001B, CPU_APIC, CPU_K8_PLUS, }, - { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_K7_PLUS }, - { 0x0000008B, 0x0000008B, CPU_VER, CPU_K8_PLUS }, - { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_K8_PLUS, }, - - { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_K8_PLUS, }, - { 0x00000179, 0x0000017B, CPU_MC, CPU_K8_PLUS, }, - { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_K8_PLUS, }, - { 0x000001DB, 0x000001DE, CPU_LBRANCH, CPU_K8_PLUS, }, - - { 0x00000200, 0x0000020F, CPU_MTRR, CPU_K8_PLUS, }, - { 0x00000250, 0x00000250, CPU_MTRR, CPU_K8_PLUS, }, - { 0x00000258, 0x00000259, CPU_MTRR, CPU_K8_PLUS, }, - { 0x00000268, 0x0000026F, CPU_MTRR, CPU_K8_PLUS, }, - { 0x00000277, 0x00000277, CPU_PAT, CPU_K8_PLUS, }, - { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_K8_PLUS, }, - - { 0x00000400, 0x00000413, CPU_MC, CPU_K8_PLUS, }, - - { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_AMD_ALL, }, - { 0xC0000081, 0xC0000084, CPU_CALL, CPU_K8_PLUS, }, - { 0xC0000100, 0xC0000102, CPU_BASE, CPU_K8_PLUS, }, - { 0xC0000103, 0xC0000103, CPU_TIME, CPU_K10_PLUS, }, - - { 0xC0010000, 0xC0010007, CPU_PMC, CPU_K8_PLUS, }, - { 0xC0010010, 0xC0010010, CPU_CONF, CPU_K7_PLUS, }, - { 0xC0010015, 0xC0010015, CPU_CONF, CPU_K7_PLUS, }, - { 0xC0010016, 0xC001001A, CPU_MTRR, CPU_K8_PLUS, }, - { 0xC001001D, 0xC001001D, CPU_MTRR, CPU_K8_PLUS, }, - { 0xC001001F, 0xC001001F, CPU_CONF, CPU_K8_PLUS, }, - { 0xC0010030, 0xC0010035, CPU_BIOS, CPU_K8_PLUS, }, - { 0xC0010044, 0xC0010048, CPU_MC, CPU_K8_PLUS, }, - { 0xC0010050, 0xC0010056, CPU_SMM, CPU_K0F_PLUS, }, - { 0xC0010058, 0xC0010058, CPU_CONF, CPU_K10_PLUS, }, - { 0xC0010060, 0xC0010060, CPU_CACHE, CPU_AMD_11, }, - { 0xC0010061, 0xC0010068, CPU_SMM, CPU_K10_PLUS, }, - { 0xC0010069, 0xC001006B, CPU_SMM, CPU_AMD_11, }, - { 0xC0010070, 0xC0010071, CPU_SMM, CPU_K10_PLUS, }, - { 0xC0010111, 0xC0010113, CPU_SMM, CPU_K8_PLUS, }, - { 0xC0010114, 0xC0010118, CPU_SVM, CPU_K10_PLUS, }, - { 0xC0010140, 0xC0010141, CPU_OSVM, CPU_K10_PLUS, }, - { 0xC0011022, 0xC0011023, CPU_CONF, CPU_K10_PLUS, }, -}; - - -/* Intel */ -static int get_intel_modelflag(unsigned model) -{ - int flag; - - switch (model) { - case 0x0501: - case 0x0502: - case 0x0504: - flag = CPU_INTEL_PENTIUM; - break; - case 0x0601: - case 0x0603: - case 0x0605: - case 0x0607: - case 0x0608: - case 0x060A: - case 0x060B: - flag = CPU_INTEL_P6; - break; - case 0x0609: - case 0x060D: - flag = CPU_INTEL_PENTIUM_M; - break; - case 0x060E: - flag = CPU_INTEL_CORE; - break; - case 0x060F: - case 0x0617: - flag = CPU_INTEL_CORE2; - break; - case 0x061C: - flag = CPU_INTEL_ATOM; - break; - case 0x0F00: - case 0x0F01: - case 0x0F02: - case 0x0F03: - case 0x0F04: - flag = CPU_INTEL_XEON_P4; - break; - case 0x0F06: - flag = CPU_INTEL_XEON_MP; - break; - default: - flag = CPU_NONE; - break; - } - - return flag; -} - -/* AMD */ -static int get_amd_modelflag(unsigned model) -{ - int flag; - - switch (model >> 8) { - case 0x6: - flag = CPU_AMD_K6; - break; - case 0x7: - flag = CPU_AMD_K7; - break; - case 0x8: - flag = CPU_AMD_K8; - break; - case 0xf: - flag = CPU_AMD_0F; - break; - case 0x10: - flag = CPU_AMD_10; - break; - case 0x11: - flag = CPU_AMD_11; - break; - default: - flag = CPU_NONE; - break; - } - - return flag; -} - -static int get_cpu_modelflag(unsigned cpu) -{ - int flag; - - flag = per_cpu(cpu_model, cpu); - - switch (flag >> 16) { - case X86_VENDOR_INTEL: - flag = get_intel_modelflag(flag); - break; - case X86_VENDOR_AMD: - flag = get_amd_modelflag(flag & 0xffff); - break; - default: - flag = CPU_NONE; - break; - } - - return flag; -} - -static int get_cpu_range_count(unsigned cpu) -{ - int index; - - switch (per_cpu(cpu_model, cpu) >> 16) { - case X86_VENDOR_INTEL: - index = ARRAY_SIZE(cpu_intel_range); - break; - case X86_VENDOR_AMD: - index = ARRAY_SIZE(cpu_amd_range); - break; - default: - index = 0; - break; - } - - return index; -} - static int is_typeflag_valid(unsigned cpu, unsigned flag) { - unsigned vendor, modelflag; - int i, index; + int i; /* Standard Registers should be always valid */ if (flag >= CPU_TSS) return 1; - modelflag = per_cpu(cpu_modelflag, cpu); - vendor = per_cpu(cpu_model, cpu) >> 16; - index = get_cpu_range_count(cpu); - - for (i = 0; i < index; i++) { - switch (vendor) { - case X86_VENDOR_INTEL: - if ((cpu_intel_range[i].model & modelflag) && - (cpu_intel_range[i].flag & flag)) - return 1; - break; - case X86_VENDOR_AMD: - if ((cpu_amd_range[i].model & modelflag) && - (cpu_amd_range[i].flag & flag)) - return 1; - break; - } + for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { + if (cpu_reg_range[i].flag == flag) + return 1; } /* Invalid */ @@ -385,26 +183,11 @@ static int is_typeflag_valid(unsigned cpu, unsigned flag) static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max, int index, unsigned flag) { - unsigned modelflag; - - modelflag = per_cpu(cpu_modelflag, cpu); - *max = 0; - switch (per_cpu(cpu_model, cpu) >> 16) { - case X86_VENDOR_INTEL: - if ((cpu_intel_range[index].model & modelflag) && - (cpu_intel_range[index].flag & flag)) { - *min = cpu_intel_range[index].min; - *max = cpu_intel_range[index].max; - } - break; - case X86_VENDOR_AMD: - if ((cpu_amd_range[index].model & modelflag) && - (cpu_amd_range[index].flag & flag)) { - *min = cpu_amd_range[index].min; - *max = cpu_amd_range[index].max; - } - break; - } + if (cpu_reg_range[index].flag == flag) { + *min = cpu_reg_range[index].min; + *max = cpu_reg_range[index].max; + } else + *max = 0; return *max; } @@ -434,7 +217,7 @@ static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag) unsigned msr, msr_min, msr_max; struct cpu_private *priv; u32 low, high; - int i, range; + int i; if (seq) { priv = seq->private; @@ -446,9 +229,7 @@ static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag) } } - range = get_cpu_range_count(cpu); - - for (i = 0; i < range; i++) { + for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag)) continue; @@ -588,8 +369,20 @@ static void print_apic(void *arg) seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT)); seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT)); seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR)); -#endif /* CONFIG_X86_LOCAL_APIC */ + if (boot_cpu_has(X86_FEATURE_EXTAPIC)) { + unsigned int i, v, maxeilvt; + + v = apic_read(APIC_EFEAT); + maxeilvt = (v >> 16) & 0xff; + seq_printf(seq, " EFEAT\t\t: %08x\n", v); + seq_printf(seq, " ECTRL\t\t: %08x\n", apic_read(APIC_ECTRL)); + for (i = 0; i < maxeilvt; i++) { + v = apic_read(APIC_EILVTn(i)); + seq_printf(seq, " EILVT%d\t\t: %08x\n", i, v); + } + } +#endif /* CONFIG_X86_LOCAL_APIC */ seq_printf(seq, "\n MSR\t:\n"); } @@ -788,13 +581,11 @@ static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry) { struct dentry *cpu_dentry = NULL; unsigned reg, reg_min, reg_max; - int i, range, err = 0; + int i, err = 0; char reg_dir[12]; u32 low, high; - range = get_cpu_range_count(cpu); - - for (i = 0; i < range; i++) { + for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { if (!get_cpu_range(cpu, ®_min, ®_max, i, cpu_base[type].flag)) continue; @@ -850,10 +641,6 @@ static int cpu_init_cpu(void) cpui = &cpu_data(cpu); if (!cpu_has(cpui, X86_FEATURE_MSR)) continue; - per_cpu(cpu_model, cpu) = ((cpui->x86_vendor << 16) | - (cpui->x86 << 8) | - (cpui->x86_model)); - per_cpu(cpu_modelflag, cpu) = get_cpu_modelflag(cpu); sprintf(cpu_dir, "cpu%d", cpu); cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir); diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index 52c839875478..f138c6c389b9 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig @@ -220,11 +220,14 @@ config X86_LONGHAUL If in doubt, say N. config X86_E_POWERSAVER - tristate "VIA C7 Enhanced PowerSaver" + tristate "VIA C7 Enhanced PowerSaver (DANGEROUS)" select CPU_FREQ_TABLE - depends on X86_32 + depends on X86_32 && EXPERIMENTAL help - This adds the CPUFreq driver for VIA C7 processors. + This adds the CPUFreq driver for VIA C7 processors. However, this driver + does not have any safeguards to prevent operating the CPU out of spec + and is thus considered dangerous. Please use the regular ACPI cpufreq + driver, enabled by CONFIG_X86_ACPI_CPUFREQ. If in doubt, say N. diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 208ecf6643df..ae9b503220ca 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -90,11 +90,7 @@ static int check_est_cpu(unsigned int cpuid) { struct cpuinfo_x86 *cpu = &cpu_data(cpuid); - if (cpu->x86_vendor != X86_VENDOR_INTEL || - !cpu_has(cpu, X86_FEATURE_EST)) - return 0; - - return 1; + return cpu_has(cpu, X86_FEATURE_EST); } static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) @@ -550,7 +546,7 @@ static int __init acpi_cpufreq_early_init(void) return -ENOMEM; } for_each_possible_cpu(i) { - if (!alloc_cpumask_var_node( + if (!zalloc_cpumask_var_node( &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map, GFP_KERNEL, cpu_to_node(i))) { @@ -693,8 +689,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE && policy->cpuinfo.transition_latency > 20 * 1000) { policy->cpuinfo.transition_latency = 20 * 1000; - printk_once(KERN_INFO "Capping off P-state tranision" - " latency at 20 uS\n"); + printk_once(KERN_INFO + "P-state transition latency capped at 20 uS\n"); } /* table init */ diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 6ac55bd341ae..869615193720 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -168,6 +168,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) case 0x0E: /* Core */ case 0x0F: /* Core Duo */ case 0x16: /* Celeron Core */ + case 0x1C: /* Atom */ p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE); case 0x0D: /* Pentium M (Dothan) */ diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 3c28ccd49742..d47c775eb0ab 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -168,10 +168,12 @@ static int check_powernow(void) return 1; } +#ifdef CONFIG_X86_POWERNOW_K7_ACPI static void invalidate_entry(unsigned int entry) { powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; } +#endif static int get_ranges(unsigned char *pst) { @@ -320,7 +322,7 @@ static int powernow_acpi_init(void) goto err0; } - if (!alloc_cpumask_var(&acpi_processor_perf->shared_cpu_map, + if (!zalloc_cpumask_var(&acpi_processor_perf->shared_cpu_map, GFP_KERNEL)) { retval = -ENOMEM; goto err05; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 4709ead2db52..cf52215d9eb1 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -649,6 +649,20 @@ static void print_basics(struct powernow_k8_data *data) data->batps); } +static u32 freq_from_fid_did(u32 fid, u32 did) +{ + u32 mhz = 0; + + if (boot_cpu_data.x86 == 0x10) + mhz = (100 * (fid + 0x10)) >> did; + else if (boot_cpu_data.x86 == 0x11) + mhz = (100 * (fid + 8)) >> did; + else + BUG(); + + return mhz * 1000; +} + static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) { @@ -821,7 +835,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { struct cpufreq_frequency_table *powernow_table; int ret_val = -ENODEV; - acpi_integer space_id; + acpi_integer control, status; if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { dprintk("register performance failed: bad ACPI data\n"); @@ -834,12 +848,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) goto err_out; } - space_id = data->acpi_data.control_register.space_id; - if ((space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || - (space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { + control = data->acpi_data.control_register.space_id; + status = data->acpi_data.status_register.space_id; + + if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) || + (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) { dprintk("Invalid control/status registers (%x - %x)\n", - data->acpi_data.control_register.space_id, - space_id); + control, status); goto err_out; } @@ -872,7 +887,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); - if (!alloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) { + if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) { printk(KERN_ERR PFX "unable to alloc powernow_k8_data cpumask\n"); ret_val = -ENOMEM; @@ -923,8 +938,13 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, powernow_table[i].index = index; - powernow_table[i].frequency = - data->acpi_data.states[i].core_frequency * 1000; + /* Frequency may be rounded for these */ + if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) { + powernow_table[i].frequency = + freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); + } else + powernow_table[i].frequency = + data->acpi_data.states[i].core_frequency * 1000; } return 0; } @@ -1215,13 +1235,16 @@ static int powernowk8_verify(struct cpufreq_policy *pol) return cpufreq_frequency_table_verify(pol, data->powernow_table); } +static const char ACPI_PSS_BIOS_BUG_MSG[] = + KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" + KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; + /* per CPU init entry point to the driver */ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; cpumask_t oldmask; int rc; - static int print_once; if (!cpu_online(pol->cpu)) return -ENODEV; @@ -1244,19 +1267,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) * an UP version, and is deprecated by AMD. */ if (num_online_cpus() != 1) { - /* - * Replace this one with print_once as soon as such a - * thing gets introduced - */ - if (!print_once) { - WARN_ONCE(1, KERN_ERR FW_BUG PFX "Your BIOS " - "does not provide ACPI _PSS objects " - "in a way that Linux understands. " - "Please report this to the Linux ACPI" - " maintainers and complain to your " - "BIOS vendor.\n"); - print_once++; - } + printk_once(ACPI_PSS_BIOS_BUG_MSG); goto err_out; } if (pol->cpu != 0) { diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index c9f1fdc02830..55c831ed71ce 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -471,7 +471,7 @@ static int centrino_target (struct cpufreq_policy *policy, if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL))) return -ENOMEM; - if (unlikely(!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))) { + if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) { free_cpumask_var(saved_mask); return -ENOMEM; } diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 7437fa133c02..daed39ba2614 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -229,12 +229,12 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) } #endif -static void __cpuinit srat_detect_node(void) +static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) { #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) unsigned node; int cpu = smp_processor_id(); - int apicid = hard_smp_processor_id(); + int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid; /* Don't do the funky fallback heuristics the AMD version employs for now. */ @@ -400,7 +400,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) } /* Work around errata */ - srat_detect_node(); + srat_detect_node(c); if (cpu_has(c, X86_FEATURE_VMX)) detect_vmx_virtcap(c); diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 483eda96e102..789efe217e1a 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -17,6 +17,7 @@ #include <asm/processor.h> #include <asm/smp.h> +#include <asm/k8.h> #define LVL_1_INST 1 #define LVL_1_DATA 2 @@ -159,14 +160,6 @@ struct _cpuid4_info_regs { unsigned long can_disable; }; -#if defined(CONFIG_PCI) && defined(CONFIG_SYSFS) -static struct pci_device_id k8_nb_id[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, - {} -}; -#endif - unsigned short num_cache_leaves; /* AMD doesn't have CPUID4. Emulate it here to report the same @@ -207,10 +200,17 @@ union l3_cache { }; static const unsigned short __cpuinitconst assocs[] = { - [1] = 1, [2] = 2, [4] = 4, [6] = 8, - [8] = 16, [0xa] = 32, [0xb] = 48, + [1] = 1, + [2] = 2, + [4] = 4, + [6] = 8, + [8] = 16, + [0xa] = 32, + [0xb] = 48, [0xc] = 64, - [0xf] = 0xffff // ?? + [0xd] = 96, + [0xe] = 128, + [0xf] = 0xffff /* fully associative - no way to show this currently */ }; static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 }; @@ -271,7 +271,8 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, eax->split.type = types[leaf]; eax->split.level = levels[leaf]; if (leaf == 3) - eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1; + eax->split.num_threads_sharing = + current_cpu_data.x86_max_cores - 1; else eax->split.num_threads_sharing = 0; eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; @@ -291,6 +292,14 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) { if (index < 3) return; + + if (boot_cpu_data.x86 == 0x11) + return; + + /* see erratum #382 */ + if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8)) + return; + this_leaf->can_disable = 1; } @@ -696,97 +705,75 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) #define to_object(k) container_of(k, struct _index_kobject, kobj) #define to_attr(a) container_of(a, struct _cache_attr, attr) -#ifdef CONFIG_PCI -static struct pci_dev *get_k8_northbridge(int node) -{ - struct pci_dev *dev = NULL; - int i; - - for (i = 0; i <= node; i++) { - do { - dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); - if (!dev) - break; - } while (!pci_match_id(&k8_nb_id[0], dev)); - if (!dev) - break; - } - return dev; -} -#else -static struct pci_dev *get_k8_northbridge(int node) -{ - return NULL; -} -#endif - -static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) +static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, + unsigned int index) { - const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); - int node = cpu_to_node(cpumask_first(mask)); - struct pci_dev *dev = NULL; - ssize_t ret = 0; - int i; + int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); + int node = cpu_to_node(cpu); + struct pci_dev *dev = node_to_k8_nb_misc(node); + unsigned int reg = 0; if (!this_leaf->can_disable) - return sprintf(buf, "Feature not enabled\n"); - - dev = get_k8_northbridge(node); - if (!dev) { - printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); return -EINVAL; - } - for (i = 0; i < 2; i++) { - unsigned int reg; + if (!dev) + return -EINVAL; - pci_read_config_dword(dev, 0x1BC + i * 4, ®); + pci_read_config_dword(dev, 0x1BC + index * 4, ®); + return sprintf(buf, "%x\n", reg); +} - ret += sprintf(buf, "%sEntry: %d\n", buf, i); - ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", - buf, - reg & 0x80000000 ? "Disabled" : "Allowed", - reg & 0x40000000 ? "Disabled" : "Allowed"); - ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", - buf, (reg & 0x30000) >> 16, reg & 0xfff); - } - return ret; +#define SHOW_CACHE_DISABLE(index) \ +static ssize_t \ +show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \ +{ \ + return show_cache_disable(this_leaf, buf, index); \ } +SHOW_CACHE_DISABLE(0) +SHOW_CACHE_DISABLE(1) -static ssize_t -store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, - size_t count) +static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, + const char *buf, size_t count, unsigned int index) { - const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); - int node = cpu_to_node(cpumask_first(mask)); - struct pci_dev *dev = NULL; - unsigned int ret, index, val; + int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); + int node = cpu_to_node(cpu); + struct pci_dev *dev = node_to_k8_nb_misc(node); + unsigned long val = 0; + unsigned int scrubber = 0; if (!this_leaf->can_disable) - return 0; - - if (strlen(buf) > 15) return -EINVAL; - ret = sscanf(buf, "%x %x", &index, &val); - if (ret != 2) + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!dev) return -EINVAL; - if (index > 1) + + if (strict_strtoul(buf, 10, &val) < 0) return -EINVAL; val |= 0xc0000000; - dev = get_k8_northbridge(node); - if (!dev) { - printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); - return -EINVAL; - } + + pci_read_config_dword(dev, 0x58, &scrubber); + scrubber &= ~0x1f000000; + pci_write_config_dword(dev, 0x58, scrubber); pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); wbinvd(); pci_write_config_dword(dev, 0x1BC + index * 4, val); + return count; +} - return 1; +#define STORE_CACHE_DISABLE(index) \ +static ssize_t \ +store_cache_disable_##index(struct _cpuid4_info *this_leaf, \ + const char *buf, size_t count) \ +{ \ + return store_cache_disable(this_leaf, buf, count, index); \ } +STORE_CACHE_DISABLE(0) +STORE_CACHE_DISABLE(1) struct _cache_attr { struct attribute attr; @@ -808,7 +795,10 @@ define_one_ro(size); define_one_ro(shared_cpu_map); define_one_ro(shared_cpu_list); -static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable); +static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, + show_cache_disable_0, store_cache_disable_0); +static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, + show_cache_disable_1, store_cache_disable_1); static struct attribute * default_attrs[] = { &type.attr, @@ -820,7 +810,8 @@ static struct attribute * default_attrs[] = { &size.attr, &shared_cpu_map.attr, &shared_cpu_list.attr, - &cache_disable.attr, + &cache_disable_0.attr, + &cache_disable_1.attr, NULL }; diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 6fb0b359d2a5..09dd1d414fc3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -1163,7 +1163,7 @@ static __init int mce_init_device(void) if (!mce_available(&boot_cpu_data)) return -EIO; - alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); + zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); err = mce_init_banks(); if (err) diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index cef3ee30744b..65a0fceedcd7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -15,7 +15,6 @@ #include <asm/hw_irq.h> #include <asm/idle.h> #include <asm/therm_throt.h> -#include <asm/apic.h> asmlinkage void smp_thermal_interrupt(void) { diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index ce0fe4b5c04f..1d584a18a50d 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -808,7 +808,7 @@ int __init mtrr_cleanup(unsigned address_bits) if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) return 0; - rdmsr(MTRRdefType_MSR, def, dummy); + rdmsr(MSR_MTRRdefType, def, dummy); def &= 0xff; if (def != MTRR_TYPE_UNCACHABLE) return 0; @@ -1003,7 +1003,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) */ if (!is_cpu(INTEL) || disable_mtrr_trim) return 0; - rdmsr(MTRRdefType_MSR, def, dummy); + rdmsr(MSR_MTRRdefType, def, dummy); def &= 0xff; if (def != MTRR_TYPE_UNCACHABLE) return 0; diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 0b776c09aff3..0543f69f0b27 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -20,9 +20,9 @@ struct fixed_range_block { }; static struct fixed_range_block fixed_range_blocks[] = { - { MTRRfix64K_00000_MSR, 1 }, /* one 64k MTRR */ - { MTRRfix16K_80000_MSR, 2 }, /* two 16k MTRRs */ - { MTRRfix4K_C0000_MSR, 8 }, /* eight 4k MTRRs */ + { MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */ + { MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */ + { MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */ {} }; @@ -194,12 +194,12 @@ get_fixed_ranges(mtrr_type * frs) k8_check_syscfg_dram_mod_en(); - rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]); + rdmsr(MSR_MTRRfix64K_00000, p[0], p[1]); for (i = 0; i < 2; i++) - rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]); + rdmsr(MSR_MTRRfix16K_80000 + i, p[2 + i * 2], p[3 + i * 2]); for (i = 0; i < 8; i++) - rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]); + rdmsr(MSR_MTRRfix4K_C0000 + i, p[6 + i * 2], p[7 + i * 2]); } void mtrr_save_fixed_ranges(void *info) @@ -275,7 +275,11 @@ static void __init print_mtrr_state(void) } printk(KERN_DEBUG "MTRR variable ranges %sabled:\n", mtrr_state.enabled & 2 ? "en" : "dis"); - high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4; + if (size_or_mask & 0xffffffffUL) + high_width = ffs(size_or_mask & 0xffffffffUL) - 1; + else + high_width = ffs(size_or_mask>>32) + 32 - 1; + high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4; for (i = 0; i < num_var_ranges; ++i) { if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) printk(KERN_DEBUG " %u base %0*X%05X000 mask %0*X%05X000 %s\n", @@ -306,7 +310,7 @@ void __init get_mtrr_state(void) vrs = mtrr_state.var_ranges; - rdmsr(MTRRcap_MSR, lo, dummy); + rdmsr(MSR_MTRRcap, lo, dummy); mtrr_state.have_fixed = (lo >> 8) & 1; for (i = 0; i < num_var_ranges; i++) @@ -314,7 +318,7 @@ void __init get_mtrr_state(void) if (mtrr_state.have_fixed) get_fixed_ranges(mtrr_state.fixed_ranges); - rdmsr(MTRRdefType_MSR, lo, dummy); + rdmsr(MSR_MTRRdefType, lo, dummy); mtrr_state.def_type = (lo & 0xff); mtrr_state.enabled = (lo & 0xc00) >> 10; @@ -579,10 +583,10 @@ static void prepare_set(void) __acquires(set_atomicity_lock) __flush_tlb(); /* Save MTRR state */ - rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); + rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); /* Disable MTRRs, and set the default type to uncached */ - mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & ~0xcff, deftype_hi); + mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); } static void post_set(void) __releases(set_atomicity_lock) @@ -591,7 +595,7 @@ static void post_set(void) __releases(set_atomicity_lock) __flush_tlb(); /* Intel (P6) standard MTRRs */ - mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); + mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); /* Enable caches */ write_cr0(read_cr0() & 0xbfffffff); @@ -703,7 +707,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, unsigned i static int generic_have_wrcomb(void) { unsigned long config, dummy; - rdmsr(MTRRcap_MSR, config, dummy); + rdmsr(MSR_MTRRcap, config, dummy); return (config & (1 << 10)); } diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 03cda01f57c7..8fc248b5aeaf 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -104,7 +104,7 @@ static void __init set_num_var_ranges(void) unsigned long config = 0, dummy; if (use_intel()) { - rdmsr(MTRRcap_MSR, config, dummy); + rdmsr(MSR_MTRRcap, config, dummy); } else if (is_cpu(AMD)) config = 2; else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 77f67f7b347a..7538b767f206 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -5,21 +5,6 @@ #include <linux/types.h> #include <linux/stddef.h> -#define MTRRcap_MSR 0x0fe -#define MTRRdefType_MSR 0x2ff - -#define MTRRfix64K_00000_MSR 0x250 -#define MTRRfix16K_80000_MSR 0x258 -#define MTRRfix16K_A0000_MSR 0x259 -#define MTRRfix4K_C0000_MSR 0x268 -#define MTRRfix4K_C8000_MSR 0x269 -#define MTRRfix4K_D0000_MSR 0x26a -#define MTRRfix4K_D8000_MSR 0x26b -#define MTRRfix4K_E0000_MSR 0x26c -#define MTRRfix4K_E8000_MSR 0x26d -#define MTRRfix4K_F0000_MSR 0x26e -#define MTRRfix4K_F8000_MSR 0x26f - #define MTRR_CHANGE_MASK_FIXED 0x01 #define MTRR_CHANGE_MASK_VARIABLE 0x02 #define MTRR_CHANGE_MASK_DEFTYPE 0x04 diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c index 7f7e2753685b..1f5fb1588d1f 100644 --- a/arch/x86/kernel/cpu/mtrr/state.c +++ b/arch/x86/kernel/cpu/mtrr/state.c @@ -35,7 +35,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) if (use_intel()) /* Save MTRR state */ - rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); + rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); else /* Cyrix ARRs - everything else were excluded at the top */ ctxt->ccr3 = getCx86(CX86_CCR3); @@ -46,7 +46,7 @@ void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) { if (use_intel()) /* Disable MTRRs, and set the default type to uncached */ - mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL, + mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL, ctxt->deftype_hi); else if (is_cpu(CYRIX)) /* Cyrix ARRs - everything else were excluded at the top */ @@ -64,7 +64,7 @@ void set_mtrr_done(struct set_mtrr_context *ctxt) /* Restore MTRRdefType */ if (use_intel()) /* Intel (P6) standard MTRRs */ - mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); + mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); else /* Cyrix ARRs - everything else was excluded at the top */ setCx86(CX86_CCR3, ctxt->ccr3); diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h index da87590b8698..81086c227ab7 100644 --- a/arch/x86/kernel/dumpstack.h +++ b/arch/x86/kernel/dumpstack.h @@ -29,7 +29,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *sp, unsigned long bp, char *log_lvl); extern unsigned int code_bytes; -extern int kstack_depth_to_print; /* The form of the top of the frame on the stack */ struct stack_frame { diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 006281302925..7271fa33d791 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -617,7 +617,7 @@ __init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, */ __init void e820_setup_gap(void) { - unsigned long gapstart, gapsize, round; + unsigned long gapstart, gapsize; int found; gapstart = 0x10000000; @@ -635,14 +635,9 @@ __init void e820_setup_gap(void) #endif /* - * See how much we want to round up: start off with - * rounding to the next 1MB area. + * e820_reserve_resources_late protect stolen RAM already */ - round = 0x100000; - while ((gapsize >> 4) > round) - round += round; - /* Fun with two's complement */ - pci_mem_start = (gapstart + round) & -round; + pci_mem_start = gapstart; printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", @@ -1371,6 +1366,23 @@ void __init e820_reserve_resources(void) } } +/* How much should we pad RAM ending depending on where it is? */ +static unsigned long ram_alignment(resource_size_t pos) +{ + unsigned long mb = pos >> 20; + + /* To 64kB in the first megabyte */ + if (!mb) + return 64*1024; + + /* To 1MB in the first 16MB */ + if (mb < 16) + return 1024*1024; + + /* To 32MB for anything above that */ + return 32*1024*1024; +} + void __init e820_reserve_resources_late(void) { int i; @@ -1382,6 +1394,24 @@ void __init e820_reserve_resources_late(void) insert_resource_expand_to_fit(&iomem_resource, res); res++; } + + /* + * Try to bump up RAM regions to reasonable boundaries to + * avoid stolen RAM: + */ + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *entry = &e820_saved.map[i]; + resource_size_t start, end; + + if (entry->type != E820_RAM) + continue; + start = entry->addr + entry->size; + end = round_up(start, ram_alignment(start)); + if (start == end) + continue; + reserve_region_with_split(&iomem_resource, start, + end - 1, "RAM buffer"); + } } char *__init default_machine_specific_memory_setup(void) diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 76b8cd953dee..ebdb85cf2686 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -97,6 +97,7 @@ static void __init nvidia_bugs(int num, int slot, int func) } #if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) +#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) static u32 __init ati_ixp4x0_rev(int num, int slot, int func) { u32 d; @@ -114,6 +115,7 @@ static u32 __init ati_ixp4x0_rev(int num, int slot, int func) d &= 0xff; return d; } +#endif static void __init ati_bugs(int num, int slot, int func) { diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 987f91f0f755..1c17d7c751a4 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1366,6 +1366,11 @@ END(xen_failsafe_callback) paranoidzeroentry_ist debug do_debug DEBUG_STACK paranoidzeroentry_ist int3 do_int3 DEBUG_STACK paranoiderrorentry stack_segment do_stack_segment +#ifdef CONFIG_XEN +zeroentry xen_debug do_debug +zeroentry xen_int3 do_int3 +errorentry xen_stack_segment do_stack_segment +#endif errorentry general_protection do_general_protection errorentry page_fault do_page_fault #ifdef CONFIG_X86_MCE diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 18dfa30795c9..b79c5533c421 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -442,7 +442,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) _ASM_EXTABLE(1b, 4b) _ASM_EXTABLE(2b, 4b) - : [old] "=r" (old), [faulted] "=r" (faulted) + : [old] "=&r" (old), [faulted] "=r" (faulted) : [parent] "r" (parent), [return_hooker] "r" (return_hooker) : "memory" ); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 30683883e0cd..dc5ed4bdd88d 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -608,13 +608,6 @@ ignore_int: ENTRY(initial_code) .long i386_start_kernel -.section .text -/* - * Real beginning of normal "text" segment - */ -ENTRY(stext) -ENTRY(_stext) - /* * BSS section */ diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index c3fe010d74c8..9a391bbb8ba8 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -12,6 +12,7 @@ #include <asm/io_apic.h> #include <asm/irq.h> #include <asm/idle.h> +#include <asm/hw_irq.h> atomic_t irq_err_count; @@ -24,9 +25,9 @@ void (*generic_interrupt_extension)(void) = NULL; */ void ack_bad_irq(unsigned int irq) { - printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); + if (printk_ratelimit()) + pr_err("unexpected IRQ trap at vector %02x\n", irq); -#ifdef CONFIG_X86_LOCAL_APIC /* * Currently unexpected vectors happen only on SMP and APIC. * We _must_ ack these because every local APIC has only N @@ -36,9 +37,7 @@ void ack_bad_irq(unsigned int irq) * completely. * But only ack when the APIC is enabled -AK */ - if (cpu_has_apic) - ack_APIC_irq(); -#endif + ack_APIC_irq(); } #define irq_stats(x) (&per_cpu(irq_stat, x)) @@ -178,7 +177,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += irq_stats(cpu)->irq_thermal_count; # ifdef CONFIG_X86_64 sum += irq_stats(cpu)->irq_threshold_count; -#endif +# endif #endif return sum; } @@ -213,14 +212,11 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) irq = __get_cpu_var(vector_irq)[vector]; if (!handle_irq(irq, regs)) { -#ifdef CONFIG_X86_64 - if (!disable_apic) - ack_APIC_irq(); -#endif + ack_APIC_irq(); if (printk_ratelimit()) - printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n", - __func__, smp_processor_id(), vector, irq); + pr_emerg("%s: %d.%d No irq handler for vector (irq %d)\n", + __func__, smp_processor_id(), vector, irq); } irq_exit(); diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit.c index 368b0a8836f9..2e08b10ad51a 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit.c @@ -1,20 +1,25 @@ +#include <linux/linkage.h> #include <linux/errno.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/ioport.h> #include <linux/interrupt.h> +#include <linux/timex.h> #include <linux/slab.h> #include <linux/random.h> +#include <linux/kprobes.h> #include <linux/init.h> #include <linux/kernel_stat.h> #include <linux/sysdev.h> #include <linux/bitops.h> +#include <linux/acpi.h> #include <linux/io.h> #include <linux/delay.h> #include <asm/atomic.h> #include <asm/system.h> #include <asm/timer.h> +#include <asm/hw_irq.h> #include <asm/pgtable.h> #include <asm/desc.h> #include <asm/apic.h> @@ -22,7 +27,23 @@ #include <asm/i8259.h> #include <asm/traps.h> +/* + * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: + * (these are usually mapped to vectors 0x30-0x3f) + */ + +/* + * The IO-APIC gives us many more interrupt sources. Most of these + * are unused but an SMP system is supposed to have enough memory ... + * sometimes (mostly wrt. hw bugs) we get corrupted vectors all + * across the spectrum, so we really want to be prepared to get all + * of these. Plus, more powerful systems might have more than 64 + * IO-APIC registers. + * + * (these are usually mapped into the 0x30-0xff vector range) + */ +#ifdef CONFIG_X86_32 /* * Note that on a 486, we don't want to do a SIGFPE on an irq13 * as the irq is unreliable, and exception 16 works correctly @@ -52,30 +73,7 @@ static struct irqaction fpu_irq = { .handler = math_error_irq, .name = "fpu", }; - -void __init init_ISA_irqs(void) -{ - int i; - -#ifdef CONFIG_X86_LOCAL_APIC - init_bsp_APIC(); #endif - init_8259A(0); - - /* - * 16 old-style INTA-cycle interrupts: - */ - for (i = 0; i < NR_IRQS_LEGACY; i++) { - struct irq_desc *desc = irq_to_desc(i); - - desc->status = IRQ_DISABLED; - desc->action = NULL; - desc->depth = 1; - - set_irq_chip_and_handler_name(i, &i8259A_chip, - handle_level_irq, "XT"); - } -} /* * IRQ2 is cascade interrupt to second interrupt controller @@ -118,29 +116,37 @@ int vector_used_by_percpu_irq(unsigned int vector) return 0; } -/* Overridden in paravirt.c */ -void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); - -void __init native_init_IRQ(void) +static void __init init_ISA_irqs(void) { int i; - /* Execute any quirks before the call gates are initialised: */ - x86_quirk_pre_intr_init(); +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) + init_bsp_APIC(); +#endif + init_8259A(0); /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) + * 16 old-style INTA-cycle interrupts: */ - for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { - /* SYSCALL_VECTOR was reserved in trap_init. */ - if (i != SYSCALL_VECTOR) - set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); + for (i = 0; i < NR_IRQS_LEGACY; i++) { + struct irq_desc *desc = irq_to_desc(i); + + desc->status = IRQ_DISABLED; + desc->action = NULL; + desc->depth = 1; + + set_irq_chip_and_handler_name(i, &i8259A_chip, + handle_level_irq, "XT"); } +} +/* Overridden in paravirt.c */ +void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) +static void __init smp_intr_init(void) +{ +#ifdef CONFIG_SMP +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) /* * The reschedule interrupt is a CPU-to-CPU reschedule-helper * IPI, driven by wakeup. @@ -160,16 +166,27 @@ void __init native_init_IRQ(void) /* IPI for generic function call */ alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - /* IPI for single call function */ + /* IPI for generic single function call */ alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); + call_function_single_interrupt); /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); #endif +#endif /* CONFIG_SMP */ +} + +static void __init apic_intr_init(void) +{ + smp_intr_init(); + +#ifdef CONFIG_X86_64 + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); + alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); +#endif -#ifdef CONFIG_X86_LOCAL_APIC +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) /* self generated IPI for local APIC timer */ alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); @@ -179,16 +196,67 @@ void __init native_init_IRQ(void) /* IPI vectors for APIC spurious and error interrupts */ alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + + /* Performance monitoring interrupts: */ +# ifdef CONFIG_PERF_COUNTERS + alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt); + alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); +# endif + #endif +#ifdef CONFIG_X86_32 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) /* thermal monitor LVT interrupt */ alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); #endif +#endif +} + +/** + * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors + * + * Description: + * Perform any necessary interrupt initialisation prior to setting up + * the "ordinary" interrupt call gates. For legacy reasons, the ISA + * interrupts should be initialised here if the machine emulates a PC + * in any way. + **/ +static void __init x86_quirk_pre_intr_init(void) +{ +#ifdef CONFIG_X86_32 + if (x86_quirks->arch_pre_intr_init) { + if (x86_quirks->arch_pre_intr_init()) + return; + } +#endif + init_ISA_irqs(); +} + +void __init native_init_IRQ(void) +{ + int i; + + /* Execute any quirks before the call gates are initialised: */ + x86_quirk_pre_intr_init(); + + apic_intr_init(); + + /* + * Cover the whole vector space, no vector can escape + * us. (some of these will be overridden and become + * 'special' SMP interrupts) + */ + for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { + /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ + if (!test_bit(i, used_vectors)) + set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); + } if (!acpi_ioapic) setup_irq(2, &irq2); +#ifdef CONFIG_X86_32 /* * Call quirks after call gates are initialised (usually add in * the architecture specific gates): @@ -203,4 +271,5 @@ void __init native_init_IRQ(void) setup_irq(FPU_IRQ, &fpu_irq); irq_ctx_init(smp_processor_id()); +#endif } diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c deleted file mode 100644 index 8cd10537fd46..000000000000 --- a/arch/x86/kernel/irqinit_64.c +++ /dev/null @@ -1,177 +0,0 @@ -#include <linux/linkage.h> -#include <linux/errno.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/ioport.h> -#include <linux/interrupt.h> -#include <linux/timex.h> -#include <linux/slab.h> -#include <linux/random.h> -#include <linux/init.h> -#include <linux/kernel_stat.h> -#include <linux/sysdev.h> -#include <linux/bitops.h> -#include <linux/acpi.h> -#include <linux/io.h> -#include <linux/delay.h> - -#include <asm/atomic.h> -#include <asm/system.h> -#include <asm/hw_irq.h> -#include <asm/pgtable.h> -#include <asm/desc.h> -#include <asm/apic.h> -#include <asm/i8259.h> - -/* - * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: - * (these are usually mapped to vectors 0x30-0x3f) - */ - -/* - * The IO-APIC gives us many more interrupt sources. Most of these - * are unused but an SMP system is supposed to have enough memory ... - * sometimes (mostly wrt. hw bugs) we get corrupted vectors all - * across the spectrum, so we really want to be prepared to get all - * of these. Plus, more powerful systems might have more than 64 - * IO-APIC registers. - * - * (these are usually mapped into the 0x30-0xff vector range) - */ - -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ - -static struct irqaction irq2 = { - .handler = no_action, - .name = "cascade", -}; -DEFINE_PER_CPU(vector_irq_t, vector_irq) = { - [0 ... IRQ0_VECTOR - 1] = -1, - [IRQ0_VECTOR] = 0, - [IRQ1_VECTOR] = 1, - [IRQ2_VECTOR] = 2, - [IRQ3_VECTOR] = 3, - [IRQ4_VECTOR] = 4, - [IRQ5_VECTOR] = 5, - [IRQ6_VECTOR] = 6, - [IRQ7_VECTOR] = 7, - [IRQ8_VECTOR] = 8, - [IRQ9_VECTOR] = 9, - [IRQ10_VECTOR] = 10, - [IRQ11_VECTOR] = 11, - [IRQ12_VECTOR] = 12, - [IRQ13_VECTOR] = 13, - [IRQ14_VECTOR] = 14, - [IRQ15_VECTOR] = 15, - [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 -}; - -int vector_used_by_percpu_irq(unsigned int vector) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (per_cpu(vector_irq, cpu)[vector] != -1) - return 1; - } - - return 0; -} - -static void __init init_ISA_irqs(void) -{ - int i; - - init_bsp_APIC(); - init_8259A(0); - - for (i = 0; i < NR_IRQS_LEGACY; i++) { - struct irq_desc *desc = irq_to_desc(i); - - desc->status = IRQ_DISABLED; - desc->action = NULL; - desc->depth = 1; - - /* - * 16 old-style INTA-cycle interrupts: - */ - set_irq_chip_and_handler_name(i, &i8259A_chip, - handle_level_irq, "XT"); - } -} - -void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); - -static void __init smp_intr_init(void) -{ -#ifdef CONFIG_SMP - /* - * The reschedule interrupt is a CPU-to-CPU reschedule-helper - * IPI, driven by wakeup. - */ - alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - - /* IPIs for invalidation */ - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); - - /* IPI for generic function call */ - alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - - /* IPI for generic single function call */ - alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); - - /* Low priority IPI to cleanup after moving an irq */ - set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); - set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); -#endif -} - -static void __init apic_intr_init(void) -{ - smp_intr_init(); - - alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); - alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); - - /* self generated IPI for local APIC timer */ - alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* generic IPI for platform specific use */ - alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt); - - /* IPI vectors for APIC spurious and error interrupts */ - alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); -} - -void __init native_init_IRQ(void) -{ - int i; - - init_ISA_irqs(); - /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) - */ - for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { - int vector = FIRST_EXTERNAL_VECTOR + i; - if (vector != IA32_SYSCALL_VECTOR) - set_intr_gate(vector, interrupt[i]); - } - - apic_intr_init(); - - if (!acpi_ioapic) - setup_irq(2, &irq2); -} diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index b1f4dffb919e..8d82a77a3f3b 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -142,7 +142,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); gdb_regs32[GDB_CS] = __KERNEL_CS; gdb_regs32[GDB_SS] = __KERNEL_DS; - gdb_regs[GDB_PC] = p->thread.ip; + gdb_regs[GDB_PC] = 0; gdb_regs[GDB_R8] = 0; gdb_regs[GDB_R9] = 0; gdb_regs[GDB_R10] = 0; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 33019ddb56b4..6551dedee20c 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -195,7 +195,7 @@ static void kvm_leave_lazy_mmu(void) struct kvm_para_state *state = kvm_para_state(); mmu_queue_flush(state); - paravirt_leave_lazy(paravirt_get_lazy_mode()); + paravirt_leave_lazy_mmu(); state->mode = paravirt_get_lazy_mode(); } diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 453b5795a5c6..366baa179913 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -13,25 +13,13 @@ * Licensed under the terms of the GNU General Public * License version 2. See file COPYING for details. */ -#include <linux/platform_device.h> -#include <linux/capability.h> -#include <linux/miscdevice.h> #include <linux/firmware.h> -#include <linux/spinlock.h> -#include <linux/cpumask.h> #include <linux/pci_ids.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> #include <linux/kernel.h> #include <linux/module.h> -#include <linux/mutex.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/cpu.h> #include <linux/pci.h> -#include <linux/fs.h> -#include <linux/mm.h> #include <asm/microcode.h> #include <asm/processor.h> @@ -79,9 +67,6 @@ struct microcode_amd { #define UCODE_CONTAINER_SECTION_HDR 8 #define UCODE_CONTAINER_HEADER_SIZE 12 -/* serialize access to the physical write */ -static DEFINE_SPINLOCK(microcode_update_lock); - static struct equiv_cpu_entry *equiv_cpu_table; static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) @@ -144,9 +129,8 @@ static int get_matching_microcode(int cpu, void *mc, int rev) return 1; } -static void apply_microcode_amd(int cpu) +static int apply_microcode_amd(int cpu) { - unsigned long flags; u32 rev, dummy; int cpu_num = raw_smp_processor_id(); struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; @@ -156,25 +140,25 @@ static void apply_microcode_amd(int cpu) BUG_ON(cpu_num != cpu); if (mc_amd == NULL) - return; + return 0; - spin_lock_irqsave(µcode_update_lock, flags); wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); /* get patch id after patching */ rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - spin_unlock_irqrestore(µcode_update_lock, flags); /* check current patch id and patch's id for match */ if (rev != mc_amd->hdr.patch_id) { printk(KERN_ERR "microcode: CPU%d: update failed " "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id); - return; + return -1; } printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n", cpu, rev); uci->cpu_sig.rev = rev; + + return 0; } static int get_ucode_data(void *to, const u8 *from, size_t n) @@ -257,13 +241,12 @@ static int install_equiv_cpu_table(const u8 *buf) static void free_equiv_cpu_table(void) { - if (equiv_cpu_table) { - vfree(equiv_cpu_table); - equiv_cpu_table = NULL; - } + vfree(equiv_cpu_table); + equiv_cpu_table = NULL; } -static int generic_load_microcode(int cpu, const u8 *data, size_t size) +static enum ucode_state +generic_load_microcode(int cpu, const u8 *data, size_t size) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; const u8 *ucode_ptr = data; @@ -272,12 +255,13 @@ static int generic_load_microcode(int cpu, const u8 *data, size_t size) int new_rev = uci->cpu_sig.rev; unsigned int leftover; unsigned long offset; + enum ucode_state state = UCODE_OK; offset = install_equiv_cpu_table(ucode_ptr); if (!offset) { printk(KERN_ERR "microcode: failed to create " "equivalent cpu table\n"); - return -EINVAL; + return UCODE_ERROR; } ucode_ptr += offset; @@ -293,8 +277,7 @@ static int generic_load_microcode(int cpu, const u8 *data, size_t size) mc_header = (struct microcode_header_amd *)mc; if (get_matching_microcode(cpu, mc, new_rev)) { - if (new_mc) - vfree(new_mc); + vfree(new_mc); new_rev = mc_header->patch_id; new_mc = mc; } else @@ -306,34 +289,32 @@ static int generic_load_microcode(int cpu, const u8 *data, size_t size) if (new_mc) { if (!leftover) { - if (uci->mc) - vfree(uci->mc); + vfree(uci->mc); uci->mc = new_mc; pr_debug("microcode: CPU%d found a matching microcode " "update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); - } else + } else { vfree(new_mc); - } + state = UCODE_ERROR; + } + } else + state = UCODE_NFOUND; free_equiv_cpu_table(); - return (int)leftover; + return state; } -static int request_microcode_fw(int cpu, struct device *device) +static enum ucode_state request_microcode_fw(int cpu, struct device *device) { const char *fw_name = "amd-ucode/microcode_amd.bin"; const struct firmware *firmware; - int ret; - - /* We should bind the task to the CPU */ - BUG_ON(cpu != raw_smp_processor_id()); + enum ucode_state ret; - ret = request_firmware(&firmware, fw_name, device); - if (ret) { + if (request_firmware(&firmware, fw_name, device)) { printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); - return ret; + return UCODE_NFOUND; } ret = generic_load_microcode(cpu, firmware->data, firmware->size); @@ -343,11 +324,12 @@ static int request_microcode_fw(int cpu, struct device *device) return ret; } -static int request_microcode_user(int cpu, const void __user *buf, size_t size) +static enum ucode_state +request_microcode_user(int cpu, const void __user *buf, size_t size) { printk(KERN_INFO "microcode: AMD microcode update via " "/dev/cpu/microcode not supported\n"); - return -1; + return UCODE_ERROR; } static void microcode_fini_cpu_amd(int cpu) diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 98c470c069d1..9c4461501fcb 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -71,27 +71,18 @@ * Thanks to Stuart Swales for pointing out this bug. */ #include <linux/platform_device.h> -#include <linux/capability.h> #include <linux/miscdevice.h> -#include <linux/firmware.h> +#include <linux/capability.h> #include <linux/smp_lock.h> -#include <linux/spinlock.h> -#include <linux/cpumask.h> -#include <linux/uaccess.h> -#include <linux/vmalloc.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/mutex.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/slab.h> #include <linux/cpu.h> #include <linux/fs.h> #include <linux/mm.h> #include <asm/microcode.h> #include <asm/processor.h> -#include <asm/msr.h> MODULE_DESCRIPTION("Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); @@ -101,36 +92,110 @@ MODULE_LICENSE("GPL"); static struct microcode_ops *microcode_ops; -/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ +/* + * Synchronization. + * + * All non cpu-hotplug-callback call sites use: + * + * - microcode_mutex to synchronize with each other; + * - get/put_online_cpus() to synchronize with + * the cpu-hotplug-callback call sites. + * + * We guarantee that only a single cpu is being + * updated at any particular moment of time. + */ static DEFINE_MUTEX(microcode_mutex); struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; EXPORT_SYMBOL_GPL(ucode_cpu_info); +/* + * Operations that are run on a target cpu: + */ + +struct cpu_info_ctx { + struct cpu_signature *cpu_sig; + int err; +}; + +static void collect_cpu_info_local(void *arg) +{ + struct cpu_info_ctx *ctx = arg; + + ctx->err = microcode_ops->collect_cpu_info(smp_processor_id(), + ctx->cpu_sig); +} + +static int collect_cpu_info_on_target(int cpu, struct cpu_signature *cpu_sig) +{ + struct cpu_info_ctx ctx = { .cpu_sig = cpu_sig, .err = 0 }; + int ret; + + ret = smp_call_function_single(cpu, collect_cpu_info_local, &ctx, 1); + if (!ret) + ret = ctx.err; + + return ret; +} + +static int collect_cpu_info(int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + int ret; + + memset(uci, 0, sizeof(*uci)); + + ret = collect_cpu_info_on_target(cpu, &uci->cpu_sig); + if (!ret) + uci->valid = 1; + + return ret; +} + +struct apply_microcode_ctx { + int err; +}; + +static void apply_microcode_local(void *arg) +{ + struct apply_microcode_ctx *ctx = arg; + + ctx->err = microcode_ops->apply_microcode(smp_processor_id()); +} + +static int apply_microcode_on_target(int cpu) +{ + struct apply_microcode_ctx ctx = { .err = 0 }; + int ret; + + ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1); + if (!ret) + ret = ctx.err; + + return ret; +} + #ifdef CONFIG_MICROCODE_OLD_INTERFACE static int do_microcode_update(const void __user *buf, size_t size) { - cpumask_t old; int error = 0; int cpu; - old = current->cpus_allowed; - for_each_online_cpu(cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + enum ucode_state ustate; if (!uci->valid) continue; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - error = microcode_ops->request_microcode_user(cpu, buf, size); - if (error < 0) - goto out; - if (!error) - microcode_ops->apply_microcode(cpu); + ustate = microcode_ops->request_microcode_user(cpu, buf, size); + if (ustate == UCODE_ERROR) { + error = -1; + break; + } else if (ustate == UCODE_OK) + apply_microcode_on_target(cpu); } -out: - set_cpus_allowed_ptr(current, &old); + return error; } @@ -143,19 +208,17 @@ static int microcode_open(struct inode *unused1, struct file *unused2) static ssize_t microcode_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos) { - ssize_t ret; + ssize_t ret = -EINVAL; if ((len >> PAGE_SHIFT) > num_physpages) { - printk(KERN_ERR "microcode: too much data (max %ld pages)\n", - num_physpages); - return -EINVAL; + pr_err("microcode: too much data (max %ld pages)\n", num_physpages); + return ret; } get_online_cpus(); mutex_lock(µcode_mutex); - ret = do_microcode_update(buf, len); - if (!ret) + if (do_microcode_update(buf, len) == 0) ret = (ssize_t)len; mutex_unlock(µcode_mutex); @@ -165,15 +228,15 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, } static const struct file_operations microcode_fops = { - .owner = THIS_MODULE, - .write = microcode_write, - .open = microcode_open, + .owner = THIS_MODULE, + .write = microcode_write, + .open = microcode_open, }; static struct miscdevice microcode_dev = { - .minor = MICROCODE_MINOR, - .name = "microcode", - .fops = µcode_fops, + .minor = MICROCODE_MINOR, + .name = "microcode", + .fops = µcode_fops, }; static int __init microcode_dev_init(void) @@ -182,9 +245,7 @@ static int __init microcode_dev_init(void) error = misc_register(µcode_dev); if (error) { - printk(KERN_ERR - "microcode: can't misc_register on minor=%d\n", - MICROCODE_MINOR); + pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR); return error; } @@ -205,42 +266,51 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); /* fake device for request_firmware */ static struct platform_device *microcode_pdev; -static long reload_for_cpu(void *unused) +static int reload_for_cpu(int cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id(); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; int err = 0; mutex_lock(µcode_mutex); if (uci->valid) { - err = microcode_ops->request_microcode_fw(smp_processor_id(), - µcode_pdev->dev); - if (!err) - microcode_ops->apply_microcode(smp_processor_id()); + enum ucode_state ustate; + + ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); + if (ustate == UCODE_OK) + apply_microcode_on_target(cpu); + else + if (ustate == UCODE_ERROR) + err = -EINVAL; } mutex_unlock(µcode_mutex); + return err; } static ssize_t reload_store(struct sys_device *dev, struct sysdev_attribute *attr, - const char *buf, size_t sz) + const char *buf, size_t size) { - char *end; - unsigned long val = simple_strtoul(buf, &end, 0); - int err = 0; + unsigned long val; int cpu = dev->id; + int ret = 0; + char *end; + val = simple_strtoul(buf, &end, 0); if (end == buf) return -EINVAL; + if (val == 1) { get_online_cpus(); if (cpu_online(cpu)) - err = work_on_cpu(cpu, reload_for_cpu, NULL); + ret = reload_for_cpu(cpu); put_online_cpus(); } - if (err) - return err; - return sz; + + if (!ret) + ret = size; + + return ret; } static ssize_t version_show(struct sys_device *dev, @@ -271,11 +341,11 @@ static struct attribute *mc_default_attrs[] = { }; static struct attribute_group mc_attr_group = { - .attrs = mc_default_attrs, - .name = "microcode", + .attrs = mc_default_attrs, + .name = "microcode", }; -static void __microcode_fini_cpu(int cpu) +static void microcode_fini_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; @@ -283,103 +353,68 @@ static void __microcode_fini_cpu(int cpu) uci->valid = 0; } -static void microcode_fini_cpu(int cpu) -{ - mutex_lock(µcode_mutex); - __microcode_fini_cpu(cpu); - mutex_unlock(µcode_mutex); -} - -static void collect_cpu_info(int cpu) +static enum ucode_state microcode_resume_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - memset(uci, 0, sizeof(*uci)); - if (!microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig)) - uci->valid = 1; + if (!uci->mc) + return UCODE_NFOUND; + + pr_debug("microcode: CPU%d updated upon resume\n", cpu); + apply_microcode_on_target(cpu); + + return UCODE_OK; } -static int microcode_resume_cpu(int cpu) +static enum ucode_state microcode_init_cpu(int cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - struct cpu_signature nsig; + enum ucode_state ustate; - pr_debug("microcode: CPU%d resumed\n", cpu); + if (collect_cpu_info(cpu)) + return UCODE_ERROR; - if (!uci->mc) - return 1; + /* --dimm. Trigger a delayed update? */ + if (system_state != SYSTEM_RUNNING) + return UCODE_NFOUND; - /* - * Let's verify that the 'cached' ucode does belong - * to this cpu (a bit of paranoia): - */ - if (microcode_ops->collect_cpu_info(cpu, &nsig)) { - __microcode_fini_cpu(cpu); - printk(KERN_ERR "failed to collect_cpu_info for resuming cpu #%d\n", - cpu); - return -1; - } + ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); - if ((nsig.sig != uci->cpu_sig.sig) || (nsig.pf != uci->cpu_sig.pf)) { - __microcode_fini_cpu(cpu); - printk(KERN_ERR "cached ucode doesn't match the resuming cpu #%d\n", - cpu); - /* Should we look for a new ucode here? */ - return 1; + if (ustate == UCODE_OK) { + pr_debug("microcode: CPU%d updated upon init\n", cpu); + apply_microcode_on_target(cpu); } - return 0; + return ustate; } -static long microcode_update_cpu(void *unused) +static enum ucode_state microcode_update_cpu(int cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id(); - int err = 0; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + enum ucode_state ustate; - /* - * Check if the system resume is in progress (uci->valid != NULL), - * otherwise just request a firmware: - */ - if (uci->valid) { - err = microcode_resume_cpu(smp_processor_id()); - } else { - collect_cpu_info(smp_processor_id()); - if (uci->valid && system_state == SYSTEM_RUNNING) - err = microcode_ops->request_microcode_fw( - smp_processor_id(), - µcode_pdev->dev); - } - if (!err) - microcode_ops->apply_microcode(smp_processor_id()); - return err; -} + if (uci->valid) + ustate = microcode_resume_cpu(cpu); + else + ustate = microcode_init_cpu(cpu); -static int microcode_init_cpu(int cpu) -{ - int err; - mutex_lock(µcode_mutex); - err = work_on_cpu(cpu, microcode_update_cpu, NULL); - mutex_unlock(µcode_mutex); - - return err; + return ustate; } static int mc_sysdev_add(struct sys_device *sys_dev) { int err, cpu = sys_dev->id; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; if (!cpu_online(cpu)) return 0; pr_debug("microcode: CPU%d added\n", cpu); - memset(uci, 0, sizeof(*uci)); err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); if (err) return err; - err = microcode_init_cpu(cpu); + if (microcode_init_cpu(cpu) == UCODE_ERROR) + err = -EINVAL; return err; } @@ -400,19 +435,30 @@ static int mc_sysdev_remove(struct sys_device *sys_dev) static int mc_sysdev_resume(struct sys_device *dev) { int cpu = dev->id; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; if (!cpu_online(cpu)) return 0; - /* only CPU 0 will apply ucode here */ - microcode_update_cpu(NULL); + /* + * All non-bootup cpus are still disabled, + * so only CPU 0 will apply ucode here. + * + * Moreover, there can be no concurrent + * updates from any other places at this point. + */ + WARN_ON(cpu != 0); + + if (uci->valid && uci->mc) + microcode_ops->apply_microcode(cpu); + return 0; } static struct sysdev_driver mc_sysdev_driver = { - .add = mc_sysdev_add, - .remove = mc_sysdev_remove, - .resume = mc_sysdev_resume, + .add = mc_sysdev_add, + .remove = mc_sysdev_remove, + .resume = mc_sysdev_resume, }; static __cpuinit int @@ -425,15 +471,12 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: - if (microcode_init_cpu(cpu)) - printk(KERN_ERR "microcode: failed to init CPU%d\n", - cpu); + microcode_update_cpu(cpu); case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: pr_debug("microcode: CPU%d added\n", cpu); if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) - printk(KERN_ERR "microcode: Failed to create the sysfs " - "group for CPU%d\n", cpu); + pr_err("microcode: Failed to create group for CPU%d\n", cpu); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: @@ -465,13 +508,10 @@ static int __init microcode_init(void) microcode_ops = init_amd_microcode(); if (!microcode_ops) { - printk(KERN_ERR "microcode: no support for this CPU vendor\n"); + pr_err("microcode: no support for this CPU vendor\n"); return -ENODEV; } - error = microcode_dev_init(); - if (error) - return error; microcode_pdev = platform_device_register_simple("microcode", -1, NULL, 0); if (IS_ERR(microcode_pdev)) { @@ -480,23 +520,31 @@ static int __init microcode_init(void) } get_online_cpus(); + mutex_lock(µcode_mutex); + error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); + + mutex_unlock(µcode_mutex); put_online_cpus(); + if (error) { - microcode_dev_exit(); platform_device_unregister(microcode_pdev); return error; } + error = microcode_dev_init(); + if (error) + return error; + register_hotcpu_notifier(&mc_cpu_notifier); - printk(KERN_INFO - "Microcode Update Driver: v" MICROCODE_VERSION + pr_info("Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>," " Peter Oruba\n"); return 0; } +module_init(microcode_init); static void __exit microcode_exit(void) { @@ -505,16 +553,17 @@ static void __exit microcode_exit(void) unregister_hotcpu_notifier(&mc_cpu_notifier); get_online_cpus(); + mutex_lock(µcode_mutex); + sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); + + mutex_unlock(µcode_mutex); put_online_cpus(); platform_device_unregister(microcode_pdev); microcode_ops = NULL; - printk(KERN_INFO - "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); + pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); } - -module_init(microcode_init); module_exit(microcode_exit); diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 149b9ec7c1ab..0d334ddd0a96 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -70,24 +70,11 @@ * Fix sigmatch() macro to handle old CPUs with pf == 0. * Thanks to Stuart Swales for pointing out this bug. */ -#include <linux/platform_device.h> -#include <linux/capability.h> -#include <linux/miscdevice.h> #include <linux/firmware.h> -#include <linux/smp_lock.h> -#include <linux/spinlock.h> -#include <linux/cpumask.h> #include <linux/uaccess.h> -#include <linux/vmalloc.h> #include <linux/kernel.h> #include <linux/module.h> -#include <linux/mutex.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/cpu.h> -#include <linux/fs.h> -#include <linux/mm.h> +#include <linux/vmalloc.h> #include <asm/microcode.h> #include <asm/processor.h> @@ -150,13 +137,9 @@ struct extended_sigtable { #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) -/* serialize access to the physical write to MSR 0x79 */ -static DEFINE_SPINLOCK(microcode_update_lock); - static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu_num); - unsigned long flags; unsigned int val[2]; memset(csig, 0, sizeof(*csig)); @@ -176,18 +159,14 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) csig->pf = 1 << ((val[1] >> 18) & 7); } - /* serialize access to the physical write to MSR 0x79 */ - spin_lock_irqsave(µcode_update_lock, flags); - wrmsr(MSR_IA32_UCODE_REV, 0, 0); /* see notes above for revision 1.07. Apparent chip bug */ sync_core(); /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); - spin_unlock_irqrestore(µcode_update_lock, flags); - pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", - csig->sig, csig->pf, csig->rev); + printk(KERN_INFO "microcode: CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", + cpu_num, csig->sig, csig->pf, csig->rev); return 0; } @@ -318,11 +297,10 @@ get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev) return 0; } -static void apply_microcode(int cpu) +static int apply_microcode(int cpu) { struct microcode_intel *mc_intel; struct ucode_cpu_info *uci; - unsigned long flags; unsigned int val[2]; int cpu_num; @@ -334,10 +312,7 @@ static void apply_microcode(int cpu) BUG_ON(cpu_num != cpu); if (mc_intel == NULL) - return; - - /* serialize access to the physical write to MSR 0x79 */ - spin_lock_irqsave(µcode_update_lock, flags); + return 0; /* write microcode via MSR 0x79 */ wrmsr(MSR_IA32_UCODE_WRITE, @@ -351,30 +326,32 @@ static void apply_microcode(int cpu) /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - spin_unlock_irqrestore(µcode_update_lock, flags); if (val[1] != mc_intel->hdr.rev) { - printk(KERN_ERR "microcode: CPU%d update from revision " - "0x%x to 0x%x failed\n", - cpu_num, uci->cpu_sig.rev, val[1]); - return; + printk(KERN_ERR "microcode: CPU%d update " + "to revision 0x%x failed\n", + cpu_num, mc_intel->hdr.rev); + return -1; } - printk(KERN_INFO "microcode: CPU%d updated from revision " - "0x%x to 0x%x, date = %04x-%02x-%02x \n", - cpu_num, uci->cpu_sig.rev, val[1], + printk(KERN_INFO "microcode: CPU%d updated to revision " + "0x%x, date = %04x-%02x-%02x \n", + cpu_num, val[1], mc_intel->hdr.date & 0xffff, mc_intel->hdr.date >> 24, (mc_intel->hdr.date >> 16) & 0xff); uci->cpu_sig.rev = val[1]; + + return 0; } -static int generic_load_microcode(int cpu, void *data, size_t size, - int (*get_ucode_data)(void *, const void *, size_t)) +static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, + int (*get_ucode_data)(void *, const void *, size_t)) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; u8 *ucode_ptr = data, *new_mc = NULL, *mc; int new_rev = uci->cpu_sig.rev; unsigned int leftover = size; + enum ucode_state state = UCODE_OK; while (leftover) { struct microcode_header_intel mc_header; @@ -412,11 +389,15 @@ static int generic_load_microcode(int cpu, void *data, size_t size, leftover -= mc_size; } - if (!new_mc) + if (leftover) { + if (new_mc) + vfree(new_mc); + state = UCODE_ERROR; goto out; + } - if (leftover) { - vfree(new_mc); + if (!new_mc) { + state = UCODE_NFOUND; goto out; } @@ -427,9 +408,8 @@ static int generic_load_microcode(int cpu, void *data, size_t size, pr_debug("microcode: CPU%d found a matching microcode update with" " version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); - - out: - return (int)leftover; +out: + return state; } static int get_ucode_fw(void *to, const void *from, size_t n) @@ -438,21 +418,19 @@ static int get_ucode_fw(void *to, const void *from, size_t n) return 0; } -static int request_microcode_fw(int cpu, struct device *device) +static enum ucode_state request_microcode_fw(int cpu, struct device *device) { char name[30]; struct cpuinfo_x86 *c = &cpu_data(cpu); const struct firmware *firmware; - int ret; + enum ucode_state ret; - /* We should bind the task to the CPU */ - BUG_ON(cpu != raw_smp_processor_id()); sprintf(name, "intel-ucode/%02x-%02x-%02x", c->x86, c->x86_model, c->x86_mask); - ret = request_firmware(&firmware, name, device); - if (ret) { + + if (request_firmware(&firmware, name, device)) { pr_debug("microcode: data file %s load failed\n", name); - return ret; + return UCODE_NFOUND; } ret = generic_load_microcode(cpu, (void *)firmware->data, @@ -468,11 +446,9 @@ static int get_ucode_user(void *to, const void *from, size_t n) return copy_from_user(to, from, n); } -static int request_microcode_user(int cpu, const void __user *buf, size_t size) +static enum ucode_state +request_microcode_user(int cpu, const void __user *buf, size_t size) { - /* We should bind the task to the CPU */ - BUG_ON(cpu != raw_smp_processor_id()); - return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 70fd7e414c15..651c93b28862 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -17,6 +17,7 @@ #include <linux/acpi.h> #include <linux/module.h> #include <linux/smp.h> +#include <linux/pci.h> #include <asm/mtrr.h> #include <asm/mpspec.h> @@ -870,24 +871,17 @@ static inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {} #endif /* CONFIG_X86_IO_APIC */ -static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, - int count) +static int +check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count) { - if (!mpc_new_phys) { - pr_info("No spare slots, try to append...take your risk, " - "new mpc_length %x\n", count); - } else { - if (count <= mpc_new_length) - pr_info("No spare slots, try to append..., " - "new mpc_length %x\n", count); - else { - pr_err("mpc_new_length %lx is too small\n", - mpc_new_length); - return -1; - } + int ret = 0; + + if (!mpc_new_phys || count <= mpc_new_length) { + WARN(1, "update_mptable: No spare slots (length: %x)\n", count); + return -1; } - return 0; + return ret; } static int __init replace_intsrc_all(struct mpc_table *mpc, @@ -946,7 +940,7 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, } else { struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; count += sizeof(struct mpc_intsrc); - if (!check_slot(mpc_new_phys, mpc_new_length, count)) + if (check_slot(mpc_new_phys, mpc_new_length, count) < 0) goto out; assign_to_mpc_intsrc(&mp_irqs[i], m); mpc->length = count; @@ -963,11 +957,14 @@ out: return 0; } -static int __initdata enable_update_mptable; +int enable_update_mptable; static int __init update_mptable_setup(char *str) { enable_update_mptable = 1; +#ifdef CONFIG_PCI + pci_routeirq = 1; +#endif return 0; } early_param("update_mptable", update_mptable_setup); @@ -980,6 +977,9 @@ static int __initdata alloc_mptable; static int __init parse_alloc_mptable_opt(char *p) { enable_update_mptable = 1; +#ifdef CONFIG_PCI + pci_routeirq = 1; +#endif alloc_mptable = 1; if (!p) return 0; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 8e45f4464880..70ec9b951d76 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -134,7 +134,9 @@ static void *get_call_destination(u8 type) .pv_irq_ops = pv_irq_ops, .pv_apic_ops = pv_apic_ops, .pv_mmu_ops = pv_mmu_ops, +#ifdef CONFIG_PARAVIRT_SPINLOCKS .pv_lock_ops = pv_lock_ops, +#endif }; return *((void **)&tmpl + type); } @@ -246,18 +248,16 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA static inline void enter_lazy(enum paravirt_lazy_mode mode) { - BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); - BUG_ON(preemptible()); + BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); - __get_cpu_var(paravirt_lazy_mode) = mode; + percpu_write(paravirt_lazy_mode, mode); } -void paravirt_leave_lazy(enum paravirt_lazy_mode mode) +static void leave_lazy(enum paravirt_lazy_mode mode) { - BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode); - BUG_ON(preemptible()); + BUG_ON(percpu_read(paravirt_lazy_mode) != mode); - __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; + percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); } void paravirt_enter_lazy_mmu(void) @@ -267,22 +267,36 @@ void paravirt_enter_lazy_mmu(void) void paravirt_leave_lazy_mmu(void) { - paravirt_leave_lazy(PARAVIRT_LAZY_MMU); + leave_lazy(PARAVIRT_LAZY_MMU); } -void paravirt_enter_lazy_cpu(void) +void paravirt_start_context_switch(struct task_struct *prev) { + BUG_ON(preemptible()); + + if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { + arch_leave_lazy_mmu_mode(); + set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); + } enter_lazy(PARAVIRT_LAZY_CPU); } -void paravirt_leave_lazy_cpu(void) +void paravirt_end_context_switch(struct task_struct *next) { - paravirt_leave_lazy(PARAVIRT_LAZY_CPU); + BUG_ON(preemptible()); + + leave_lazy(PARAVIRT_LAZY_CPU); + + if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) + arch_enter_lazy_mmu_mode(); } enum paravirt_lazy_mode paravirt_get_lazy_mode(void) { - return __get_cpu_var(paravirt_lazy_mode); + if (in_interrupt()) + return PARAVIRT_LAZY_NONE; + + return percpu_read(paravirt_lazy_mode); } void arch_flush_lazy_mmu_mode(void) @@ -290,7 +304,6 @@ void arch_flush_lazy_mmu_mode(void) preempt_disable(); if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { - WARN_ON(preempt_count() == 1); arch_leave_lazy_mmu_mode(); arch_enter_lazy_mmu_mode(); } @@ -298,19 +311,6 @@ void arch_flush_lazy_mmu_mode(void) preempt_enable(); } -void arch_flush_lazy_cpu_mode(void) -{ - preempt_disable(); - - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { - WARN_ON(preempt_count() == 1); - arch_leave_lazy_cpu_mode(); - arch_enter_lazy_cpu_mode(); - } - - preempt_enable(); -} - struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, @@ -402,10 +402,8 @@ struct pv_cpu_ops pv_cpu_ops = { .set_iopl_mask = native_set_iopl_mask, .io_delay = native_io_delay, - .lazy_mode = { - .enter = paravirt_nop, - .leave = paravirt_nop, - }, + .start_context_switch = paravirt_nop, + .end_context_switch = paravirt_nop, }; struct pv_apic_ops pv_apic_ops = { diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 755c21e906f3..971a3bec47a8 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -186,37 +186,6 @@ static struct cal_chipset_ops calioc2_chip_ops = { static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, }; -/* enable this to stress test the chip's TCE cache */ -#ifdef CONFIG_IOMMU_DEBUG -static int debugging = 1; - -static inline unsigned long verify_bit_range(unsigned long* bitmap, - int expected, unsigned long start, unsigned long end) -{ - unsigned long idx = start; - - BUG_ON(start >= end); - - while (idx < end) { - if (!!test_bit(idx, bitmap) != expected) - return idx; - ++idx; - } - - /* all bits have the expected value */ - return ~0UL; -} -#else /* debugging is disabled */ -static int debugging; - -static inline unsigned long verify_bit_range(unsigned long* bitmap, - int expected, unsigned long start, unsigned long end) -{ - return ~0UL; -} - -#endif /* CONFIG_IOMMU_DEBUG */ - static inline int translation_enabled(struct iommu_table *tbl) { /* only PHBs with translation enabled have an IOMMU table */ @@ -228,7 +197,6 @@ static void iommu_range_reserve(struct iommu_table *tbl, { unsigned long index; unsigned long end; - unsigned long badbit; unsigned long flags; index = start_addr >> PAGE_SHIFT; @@ -243,14 +211,6 @@ static void iommu_range_reserve(struct iommu_table *tbl, spin_lock_irqsave(&tbl->it_lock, flags); - badbit = verify_bit_range(tbl->it_map, 0, index, end); - if (badbit != ~0UL) { - if (printk_ratelimit()) - printk(KERN_ERR "Calgary: entry already allocated at " - "0x%lx tbl %p dma 0x%lx npages %u\n", - badbit, tbl, start_addr, npages); - } - iommu_area_reserve(tbl->it_map, index, npages); spin_unlock_irqrestore(&tbl->it_lock, flags); @@ -326,7 +286,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, unsigned int npages) { unsigned long entry; - unsigned long badbit; unsigned long badend; unsigned long flags; @@ -346,14 +305,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, spin_lock_irqsave(&tbl->it_lock, flags); - badbit = verify_bit_range(tbl->it_map, 1, entry, entry + npages); - if (badbit != ~0UL) { - if (printk_ratelimit()) - printk(KERN_ERR "Calgary: bit is off at 0x%lx " - "tbl %p dma 0x%Lx entry 0x%lx npages %u\n", - badbit, tbl, dma_addr, entry, npages); - } - iommu_area_free(tbl->it_map, entry, npages); spin_unlock_irqrestore(&tbl->it_lock, flags); @@ -1488,9 +1439,8 @@ void __init detect_calgary(void) iommu_detected = 1; calgary_detected = 1; printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n"); - printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " - "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, - debugging ? "enabled" : "disabled"); + printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", + specified_table_size); /* swiotlb for devices that aren't behind the Calgary. */ if (max_pfn > MAX_DMA32_PFN) diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index b284b58c035c..cfd9f9063896 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -144,48 +144,21 @@ static void flush_gart(void) } #ifdef CONFIG_IOMMU_LEAK - -#define SET_LEAK(x) \ - do { \ - if (iommu_leak_tab) \ - iommu_leak_tab[x] = __builtin_return_address(0);\ - } while (0) - -#define CLEAR_LEAK(x) \ - do { \ - if (iommu_leak_tab) \ - iommu_leak_tab[x] = NULL; \ - } while (0) - /* Debugging aid for drivers that don't free their IOMMU tables */ -static void **iommu_leak_tab; static int leak_trace; static int iommu_leak_pages = 20; static void dump_leak(void) { - int i; static int dump; - if (dump || !iommu_leak_tab) + if (dump) return; dump = 1; - show_stack(NULL, NULL); - /* Very crude. dump some from the end of the table too */ - printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n", - iommu_leak_pages); - for (i = 0; i < iommu_leak_pages; i += 2) { - printk(KERN_DEBUG "%lu: ", iommu_pages-i); - printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], - 0); - printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); - } - printk(KERN_DEBUG "\n"); + show_stack(NULL, NULL); + debug_dma_dump_mappings(NULL); } -#else -# define SET_LEAK(x) -# define CLEAR_LEAK(x) #endif static void iommu_full(struct device *dev, size_t size, int dir) @@ -248,7 +221,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, for (i = 0; i < npages; i++) { iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem); - SET_LEAK(iommu_page + i); phys_mem += PAGE_SIZE; } return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); @@ -294,7 +266,6 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr, npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); for (i = 0; i < npages; i++) { iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; - CLEAR_LEAK(iommu_page + i); } free_iommu(iommu_page, npages); } @@ -377,7 +348,6 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE); while (pages--) { iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); - SET_LEAK(iommu_page); addr += PAGE_SIZE; iommu_page++; } @@ -688,8 +658,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) agp_gatt_table = gatt; - enable_gart_translations(); - error = sysdev_class_register(&gart_sysdev_class); if (!error) error = sysdev_register(&device_gart); @@ -801,11 +769,12 @@ void __init gart_iommu_init(void) #ifdef CONFIG_IOMMU_LEAK if (leak_trace) { - iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, - get_order(iommu_pages*sizeof(void *))); - if (!iommu_leak_tab) + int ret; + + ret = dma_debug_resize_entries(iommu_pages); + if (ret) printk(KERN_DEBUG - "PCI-DMA: Cannot allocate leak trace area\n"); + "PCI-DMA: Cannot trace all the entries\n"); } #endif @@ -845,6 +814,14 @@ void __init gart_iommu_init(void) * the pages as Not-Present: */ wbinvd(); + + /* + * Now all caches are flushed and we can safely enable + * GART hardware. Doing it early leaves the possibility + * of stale cache entries that can lead to GART PTE + * errors. + */ + enable_gart_translations(); /* * Try to workaround a bug (thanks to BenH): diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 221a3853e268..a1712f2b50f1 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -28,7 +28,7 @@ dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) return paddr; } -phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr) +phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) { return baddr; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index fb5dfb891f0f..3bb2be1649bd 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -8,9 +8,11 @@ #include <linux/module.h> #include <linux/pm.h> #include <linux/clockchips.h> +#include <linux/random.h> #include <trace/power.h> #include <asm/system.h> #include <asm/apic.h> +#include <asm/syscalls.h> #include <asm/idle.h> #include <asm/uaccess.h> #include <asm/i387.h> @@ -614,3 +616,16 @@ static int __init idle_setup(char *str) } early_param("idle", idle_setup); +unsigned long arch_align_stack(unsigned long sp) +{ + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + sp -= get_random_int() % 8192; + return sp & ~0xf; +} + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ + unsigned long range_end = mm->brk + 0x02000000; + return randomize_range(mm->brk, range_end, 0) ? : mm->brk; +} + diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index b5e4bfef4472..59f4524984af 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -9,8 +9,6 @@ * This file handles the architecture-dependent parts of process handling.. */ -#include <stdarg.h> - #include <linux/stackprotector.h> #include <linux/cpu.h> #include <linux/errno.h> @@ -33,7 +31,6 @@ #include <linux/module.h> #include <linux/kallsyms.h> #include <linux/ptrace.h> -#include <linux/random.h> #include <linux/personality.h> #include <linux/tick.h> #include <linux/percpu.h> @@ -408,7 +405,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * done before math_state_restore, so the TS bit is up * to date. */ - arch_leave_lazy_cpu_mode(); + arch_end_context_switch(next_p); /* If the task has used fpu the last 5 timeslices, just do a full * restore of the math state immediately to avoid the trap; the @@ -498,15 +495,3 @@ unsigned long get_wchan(struct task_struct *p) return 0; } -unsigned long arch_align_stack(unsigned long sp) -{ - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() % 8192; - return sp & ~0xf; -} - -unsigned long arch_randomize_brk(struct mm_struct *mm) -{ - unsigned long range_end = mm->brk + 0x02000000; - return randomize_range(mm->brk, range_end, 0) ? : mm->brk; -} diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 5a1a1de292ec..ebefb5407b9d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -14,8 +14,6 @@ * This file handles the architecture-dependent parts of process handling.. */ -#include <stdarg.h> - #include <linux/stackprotector.h> #include <linux/cpu.h> #include <linux/errno.h> @@ -32,7 +30,6 @@ #include <linux/delay.h> #include <linux/module.h> #include <linux/ptrace.h> -#include <linux/random.h> #include <linux/notifier.h> #include <linux/kprobes.h> #include <linux/kdebug.h> @@ -429,7 +426,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * done before math_state_restore, so the TS bit is up * to date. */ - arch_leave_lazy_cpu_mode(); + arch_end_context_switch(next_p); /* * Switch FS and GS. @@ -661,15 +658,3 @@ long sys_arch_prctl(int code, unsigned long addr) return do_arch_prctl(current, code, addr); } -unsigned long arch_align_stack(unsigned long sp) -{ - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() % 8192; - return sp & ~0xf; -} - -unsigned long arch_randomize_brk(struct mm_struct *mm) -{ - unsigned long range_end = mm->brk + 0x02000000; - return randomize_range(mm->brk, range_end, 0) ? : mm->brk; -} diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 7563b31b4f03..af71d06624bf 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -491,5 +491,42 @@ void force_hpet_resume(void) break; } } +#endif + +#if defined(CONFIG_PCI) && defined(CONFIG_NUMA) +/* Set correct numa_node information for AMD NB functions */ +static void __init quirk_amd_nb_node(struct pci_dev *dev) +{ + struct pci_dev *nb_ht; + unsigned int devfn; + u32 val; + + devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0); + nb_ht = pci_get_slot(dev->bus, devfn); + if (!nb_ht) + return; + + pci_read_config_dword(nb_ht, 0x60, &val); + set_dev_node(&dev->dev, val & 7); + pci_dev_put(dev); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_HT, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MAP, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_LINK, + quirk_amd_nb_node); #endif diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 1340dad417f4..d2d1ce8170f0 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -192,6 +192,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "0KP561"), }, }, + { /* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */ + .callback = set_bios_reboot, + .ident = "Dell OptiPlex 360", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 360"), + DMI_MATCH(DMI_BOARD_NAME, "0T656F"), + }, + }, { /* Handle problems with rebooting on Dell 2400's */ .callback = set_bios_reboot, .ident = "Dell PowerEdge 2400", @@ -232,6 +241,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Dell DXP061"), }, }, + { /* Handle problems with rebooting on Sony VGN-Z540N */ + .callback = set_bios_reboot, + .ident = "Sony VGN-Z540N", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"), + }, + }, { } }; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b4158439bf63..d1c636bf31a7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -112,6 +112,14 @@ #define ARCH_SETUP #endif +/* + * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. + * The direct mapping extends to max_pfn_mapped, so that we can directly access + * apertures, ACPI and other tables without having to play with fixmaps. + */ +unsigned long max_low_pfn_mapped; +unsigned long max_pfn_mapped; + RESERVE_BRK(dmi_alloc, 65536); unsigned int boot_cpu_id __read_mostly; @@ -214,8 +222,8 @@ unsigned long mmu_cr4_features; unsigned long mmu_cr4_features = X86_CR4_PAE; #endif -/* Boot loader ID as an integer, for the benefit of proc_dointvec */ -int bootloader_type; +/* Boot loader ID and version as integers, for the benefit of proc_dointvec */ +int bootloader_type, bootloader_version; /* * Setup options @@ -706,6 +714,12 @@ void __init setup_arch(char **cmdline_p) #endif saved_video_mode = boot_params.hdr.vid_mode; bootloader_type = boot_params.hdr.type_of_loader; + if ((bootloader_type >> 4) == 0xe) { + bootloader_type &= 0xf; + bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4; + } + bootloader_version = bootloader_type & 0xf; + bootloader_version |= boot_params.hdr.ext_loader_ver << 4; #ifdef CONFIG_BLK_DEV_RAM rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; @@ -854,12 +868,16 @@ void __init setup_arch(char **cmdline_p) max_low_pfn = max_pfn; high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; + max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; #endif #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION setup_bios_corruption_check(); #endif + printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", + max_pfn_mapped<<PAGE_SHIFT); + reserve_brk(); /* max_pfn_mapped is updated here */ @@ -997,24 +1015,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_X86_32 /** - * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors - * - * Description: - * Perform any necessary interrupt initialisation prior to setting up - * the "ordinary" interrupt call gates. For legacy reasons, the ISA - * interrupts should be initialised here if the machine emulates a PC - * in any way. - **/ -void __init x86_quirk_pre_intr_init(void) -{ - if (x86_quirks->arch_pre_intr_init) { - if (x86_quirks->arch_pre_intr_init()) - return; - } - init_ISA_irqs(); -} - -/** * x86_quirk_intr_init - post gate setup interrupt initialisation * * Description: diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 3a97a4cf1872..9c3f0823e6aa 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -160,8 +160,10 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) /* * If large page isn't supported, there's no benefit in doing * this. Also, on non-NUMA, embedding is better. + * + * NOTE: disabled for now. */ - if (!cpu_has_pse || !pcpu_need_numa()) + if (true || !cpu_has_pse || !pcpu_need_numa()) return -EINVAL; /* @@ -423,6 +425,14 @@ void __init setup_per_cpu_areas(void) early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif +#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) + /* + * make sure boot cpu node_number is right, when boot cpu is on the + * node that doesn't have mem installed + */ + per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id); +#endif + /* Setup node to cpumask map */ setup_node_to_cpumask_map(); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 13f33ea8ccaa..f6db48c405b8 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -193,19 +193,19 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) } struct smp_ops smp_ops = { - .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, - .smp_prepare_cpus = native_smp_prepare_cpus, - .smp_cpus_done = native_smp_cpus_done, + .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, + .smp_prepare_cpus = native_smp_prepare_cpus, + .smp_cpus_done = native_smp_cpus_done, - .smp_send_stop = native_smp_send_stop, - .smp_send_reschedule = native_smp_send_reschedule, + .smp_send_stop = native_smp_send_stop, + .smp_send_reschedule = native_smp_send_reschedule, - .cpu_up = native_cpu_up, - .cpu_die = native_cpu_die, - .cpu_disable = native_cpu_disable, - .play_dead = native_play_dead, + .cpu_up = native_cpu_up, + .cpu_die = native_cpu_die, + .cpu_disable = native_cpu_disable, + .play_dead = native_play_dead, - .send_call_func_ipi = native_send_call_func_ipi, + .send_call_func_ipi = native_send_call_func_ipi, .send_call_func_single_ipi = native_send_call_func_single_ipi, }; EXPORT_SYMBOL_GPL(smp_ops); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 58d24ef917d8..7c80007ea5f7 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -504,7 +504,7 @@ void __inquire_remote_apic(int apicid) * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this * won't ... remember to clear down the APIC, etc later. */ -int __devinit +int __cpuinit wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) { unsigned long send_status, accept_status = 0; @@ -538,7 +538,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) return (send_status | accept_status); } -int __devinit +static int __cpuinit wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) { unsigned long send_status, accept_status = 0; @@ -822,10 +822,12 @@ do_rest: /* mark "stuck" area as not stuck */ *((volatile unsigned long *)trampoline_base) = 0; - /* - * Cleanup possible dangling ends... - */ - smpboot_restore_warm_reset_vector(); + if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { + /* + * Cleanup possible dangling ends... + */ + smpboot_restore_warm_reset_vector(); + } return boot_error; } @@ -990,10 +992,12 @@ static int __init smp_sanity_check(unsigned max_cpus) */ if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) { - printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - printk(KERN_ERR "... forcing use of dummy APIC emulation." + if (!disable_apic) { + pr_err("BIOS bug, local APIC #%d not detected!...\n", + boot_cpu_physical_apicid); + pr_err("... forcing use of dummy APIC emulation." "(tell your hw vendor)\n"); + } smpboot_clear_io_apic(); arch_disable_smp_support(); return -1; diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index ff5c8736b491..734f92c02dde 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -334,3 +334,4 @@ ENTRY(sys_call_table) .long sys_inotify_init1 .long sys_preadv .long sys_pwritev + .long sys_rt_tgsigqueueinfo /* 335 */ diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index ed0c33761e6d..124d40c575df 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -715,7 +715,12 @@ uv_activation_descriptor_init(int node, int pnode) struct bau_desc *adp; struct bau_desc *ad2; - adp = (struct bau_desc *)kmalloc_node(16384, GFP_KERNEL, node); + /* + * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) + * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per blade + */ + adp = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)* + UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); BUG_ON(!adp); pa = uv_gpa(adp); /* need the real nasid*/ @@ -729,7 +734,13 @@ uv_activation_descriptor_init(int node, int pnode) (n << UV_DESC_BASE_PNODE_SHIFT | m)); } - for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) { + /* + * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each + * cpu even though we only use the first one; one descriptor can + * describe a broadcast to 256 nodes. + */ + for (i = 0, ad2 = adp; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR); + i++, ad2++) { memset(ad2, 0, sizeof(struct bau_desc)); ad2->header.sw_ack_flag = 1; /* @@ -832,7 +843,7 @@ static int __init uv_bau_init(void) return 0; for_each_possible_cpu(cur_cpu) - alloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), + zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), GFP_KERNEL, cpu_to_node(cur_cpu)); uv_bau_retry_limit = 1; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a1d288327ff0..ede024531f8f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -839,9 +839,6 @@ asmlinkage void math_state_restore(void) } clts(); /* Allow maths ops (or we recurse) */ -#ifdef CONFIG_X86_32 - restore_fpu(tsk); -#else /* * Paranoid restore. send a SIGSEGV if we fail to restore the state. */ @@ -850,7 +847,7 @@ asmlinkage void math_state_restore(void) force_sig(SIGSEGV, tsk); return; } -#endif + thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ tsk->fpu_counter++; } @@ -969,11 +966,8 @@ void __init trap_init(void) for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) set_bit(i, used_vectors); -#ifdef CONFIG_X86_64 set_bit(IA32_SYSCALL_VECTOR, used_vectors); -#else - set_bit(SYSCALL_VECTOR, used_vectors); -#endif + /* * Should be a barrier for any external CPU state: */ diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index d57de05dc430..3e1c057e98fe 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -384,13 +384,13 @@ unsigned long native_calibrate_tsc(void) { u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; - unsigned long flags, latch, ms, fast_calibrate, tsc_khz; + unsigned long flags, latch, ms, fast_calibrate, hv_tsc_khz; int hpet = is_hpet_enabled(), i, loopmin; - tsc_khz = get_hypervisor_tsc_freq(); - if (tsc_khz) { + hv_tsc_khz = get_hypervisor_tsc_freq(); + if (hv_tsc_khz) { printk(KERN_INFO "TSC: Frequency read from the hypervisor\n"); - return tsc_khz; + return hv_tsc_khz; } local_irq_save(flags); @@ -710,7 +710,16 @@ static cycle_t read_tsc(struct clocksource *cs) #ifdef CONFIG_X86_64 static cycle_t __vsyscall_fn vread_tsc(void) { - cycle_t ret = (cycle_t)vget_cycles(); + cycle_t ret; + + /* + * Surround the RDTSC by barriers, to make sure it's not + * speculated to outside the seqlock critical section and + * does not cause time warps: + */ + rdtsc_barrier(); + ret = (cycle_t)vget_cycles(); + rdtsc_barrier(); return ret >= __vsyscall_gtod_data.clock.cycle_last ? ret : __vsyscall_gtod_data.clock.cycle_last; diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index bf36328f6ef9..027b5b498993 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -34,6 +34,7 @@ static __cpuinitdata atomic_t stop_count; * of a critical section, to be able to prove TSC time-warps: */ static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED; + static __cpuinitdata cycles_t last_tsc; static __cpuinitdata cycles_t max_warp; static __cpuinitdata int nr_warps; @@ -113,13 +114,12 @@ void __cpuinit check_tsc_sync_source(int cpu) return; if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { - printk(KERN_INFO - "Skipping synchronization checks as TSC is reliable.\n"); + pr_info("Skipping synchronization checks as TSC is reliable.\n"); return; } - printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:", - smp_processor_id(), cpu); + pr_info("checking TSC synchronization [CPU#%d -> CPU#%d]:", + smp_processor_id(), cpu); /* * Reset it - in case this is a second bootup: @@ -143,8 +143,8 @@ void __cpuinit check_tsc_sync_source(int cpu) if (nr_warps) { printk("\n"); - printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs," - " turning off TSC clock.\n", max_warp); + pr_warning("Measured %Ld cycles TSC warp between CPUs, " + "turning off TSC clock.\n", max_warp); mark_tsc_unstable("check_tsc_sync_source failed"); } else { printk(" passed.\n"); @@ -195,5 +195,3 @@ void __cpuinit check_tsc_sync_target(void) while (atomic_read(&stop_count) != cpus) cpu_relax(); } -#undef NR_LOOPS - diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index d7ac84e7fc1c..9c4e62539058 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -287,10 +287,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk info->regs.pt.ds = 0; info->regs.pt.es = 0; info->regs.pt.fs = 0; - -/* we are clearing gs later just before "jmp resume_userspace", - * because it is not saved/restored. - */ +#ifndef CONFIG_X86_32_LAZY_GS + info->regs.pt.gs = 0; +#endif /* * The flags register is also special: we cannot trust that the user @@ -318,9 +317,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk } /* - * Save old state, set default return value (%ax) to 0 + * Save old state, set default return value (%ax) to 0 (VM86_SIGNAL) */ - info->regs32->ax = 0; + info->regs32->ax = VM86_SIGNAL; tsk->thread.saved_sp0 = tsk->thread.sp0; tsk->thread.saved_fs = info->regs32->fs; tsk->thread.saved_gs = get_user_gs(info->regs32); @@ -343,7 +342,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk __asm__ __volatile__( "movl %0,%%esp\n\t" "movl %1,%%ebp\n\t" +#ifdef CONFIG_X86_32_LAZY_GS "mov %2, %%gs\n\t" +#endif "jmp resume_userspace" : /* no outputs */ :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 95deb9f2211e..b263423fbe2a 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -462,22 +462,28 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, } #endif -static void vmi_enter_lazy_cpu(void) +static void vmi_start_context_switch(struct task_struct *prev) { - paravirt_enter_lazy_cpu(); + paravirt_start_context_switch(prev); vmi_ops.set_lazy_mode(2); } +static void vmi_end_context_switch(struct task_struct *next) +{ + vmi_ops.set_lazy_mode(0); + paravirt_end_context_switch(next); +} + static void vmi_enter_lazy_mmu(void) { paravirt_enter_lazy_mmu(); vmi_ops.set_lazy_mode(1); } -static void vmi_leave_lazy(void) +static void vmi_leave_lazy_mmu(void) { - paravirt_leave_lazy(paravirt_get_lazy_mode()); vmi_ops.set_lazy_mode(0); + paravirt_leave_lazy_mmu(); } static inline int __init check_vmi_rom(struct vrom_header *rom) @@ -711,14 +717,14 @@ static inline int __init activate_vmi(void) para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); para_fill(pv_cpu_ops.io_delay, IODelay); - para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu, + para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch, set_lazy_mode, SetLazyMode); - para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy, + para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch, set_lazy_mode, SetLazyMode); para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, set_lazy_mode, SetLazyMode); - para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy, + para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu, set_lazy_mode, SetLazyMode); /* user and kernel flush are just handled with different flags to FlushTLB */ diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 849ee611f013..4c85b2e2bb65 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -1,5 +1,431 @@ +/* + * ld script for the x86 kernel + * + * Historic 32-bit version written by Martin Mares <mj@atrey.karlin.mff.cuni.cz> + * + * Modernisation, unification and other changes and fixes: + * Copyright (C) 2007-2009 Sam Ravnborg <sam@ravnborg.org> + * + * + * Don't define absolute symbols until and unless you know that symbol + * value is should remain constant even if kernel image is relocated + * at run time. Absolute symbols are not relocated. If symbol value should + * change if kernel is relocated, make the symbol section relative and + * put it inside the section definition. + */ + #ifdef CONFIG_X86_32 -# include "vmlinux_32.lds.S" +#define LOAD_OFFSET __PAGE_OFFSET #else -# include "vmlinux_64.lds.S" +#define LOAD_OFFSET __START_KERNEL_map #endif + +#include <asm-generic/vmlinux.lds.h> +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> +#include <asm/page_types.h> +#include <asm/cache.h> +#include <asm/boot.h> + +#undef i386 /* in case the preprocessor is a 32bit one */ + +OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) + +#ifdef CONFIG_X86_32 +OUTPUT_ARCH(i386) +ENTRY(phys_startup_32) +jiffies = jiffies_64; +#else +OUTPUT_ARCH(i386:x86-64) +ENTRY(phys_startup_64) +jiffies_64 = jiffies; +#endif + +PHDRS { + text PT_LOAD FLAGS(5); /* R_E */ + data PT_LOAD FLAGS(7); /* RWE */ +#ifdef CONFIG_X86_64 + user PT_LOAD FLAGS(7); /* RWE */ + data.init PT_LOAD FLAGS(7); /* RWE */ +#ifdef CONFIG_SMP + percpu PT_LOAD FLAGS(7); /* RWE */ +#endif + data.init2 PT_LOAD FLAGS(7); /* RWE */ +#endif + note PT_NOTE FLAGS(0); /* ___ */ +} + +SECTIONS +{ +#ifdef CONFIG_X86_32 + . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; + phys_startup_32 = startup_32 - LOAD_OFFSET; +#else + . = __START_KERNEL; + phys_startup_64 = startup_64 - LOAD_OFFSET; +#endif + + /* Text and read-only data */ + + /* bootstrapping code */ + .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { + _text = .; + *(.text.head) + } :text = 0x9090 + + /* The rest of the text */ + .text : AT(ADDR(.text) - LOAD_OFFSET) { +#ifdef CONFIG_X86_32 + /* not really needed, already page aligned */ + . = ALIGN(PAGE_SIZE); + *(.text.page_aligned) +#endif + . = ALIGN(8); + _stext = .; + TEXT_TEXT + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + IRQENTRY_TEXT + *(.fixup) + *(.gnu.warning) + /* End of text section */ + _etext = .; + } :text = 0x9090 + + NOTES :text :note + + /* Exception table */ + . = ALIGN(16); + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } :text = 0x9090 + + RODATA + + /* Data */ + . = ALIGN(PAGE_SIZE); + .data : AT(ADDR(.data) - LOAD_OFFSET) { + DATA_DATA + CONSTRUCTORS + +#ifdef CONFIG_X86_64 + /* End of data section */ + _edata = .; +#endif + } :data + +#ifdef CONFIG_X86_32 + /* 32 bit has nosave before _edata */ + . = ALIGN(PAGE_SIZE); + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { + __nosave_begin = .; + *(.data.nosave) + . = ALIGN(PAGE_SIZE); + __nosave_end = .; + } +#endif + + . = ALIGN(PAGE_SIZE); + .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { + *(.data.page_aligned) + *(.data.idt) + } + +#ifdef CONFIG_X86_32 + . = ALIGN(32); +#else + . = ALIGN(PAGE_SIZE); + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); +#endif + .data.cacheline_aligned : + AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { + *(.data.cacheline_aligned) + } + + /* rarely changed data like cpu maps */ +#ifdef CONFIG_X86_32 + . = ALIGN(32); +#else + . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); +#endif + .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { + *(.data.read_mostly) + +#ifdef CONFIG_X86_32 + /* End of data section */ + _edata = .; +#endif + } + +#ifdef CONFIG_X86_64 + +#define VSYSCALL_ADDR (-10*1024*1024) +#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \ + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) +#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + \ + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) + +#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) +#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) + +#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR) +#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) + + . = VSYSCALL_ADDR; + .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { + *(.vsyscall_0) + } :user + + __vsyscall_0 = VSYSCALL_VIRT_ADDR; + + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { + *(.vsyscall_fn) + } + + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { + *(.vsyscall_gtod_data) + } + + vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); + .vsyscall_clock : AT(VLOAD(.vsyscall_clock)) { + *(.vsyscall_clock) + } + vsyscall_clock = VVIRT(.vsyscall_clock); + + + .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { + *(.vsyscall_1) + } + .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) { + *(.vsyscall_2) + } + + .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { + *(.vgetcpu_mode) + } + vgetcpu_mode = VVIRT(.vgetcpu_mode); + + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .jiffies : AT(VLOAD(.jiffies)) { + *(.jiffies) + } + jiffies = VVIRT(.jiffies); + + .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { + *(.vsyscall_3) + } + + . = VSYSCALL_VIRT_ADDR + PAGE_SIZE; + +#undef VSYSCALL_ADDR +#undef VSYSCALL_PHYS_ADDR +#undef VSYSCALL_VIRT_ADDR +#undef VLOAD_OFFSET +#undef VLOAD +#undef VVIRT_OFFSET +#undef VVIRT + +#endif /* CONFIG_X86_64 */ + + /* init_task */ + . = ALIGN(THREAD_SIZE); + .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { + *(.data.init_task) + } +#ifdef CONFIG_X86_64 + :data.init +#endif + + /* + * smp_locks might be freed after init + * start/end must be page aligned + */ + . = ALIGN(PAGE_SIZE); + .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { + __smp_locks = .; + *(.smp_locks) + __smp_locks_end = .; + . = ALIGN(PAGE_SIZE); + } + + /* Init code and data - will be freed after init */ + . = ALIGN(PAGE_SIZE); + .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { + __init_begin = .; /* paired with __init_end */ + _sinittext = .; + INIT_TEXT + _einittext = .; + } + + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { + INIT_DATA + } + + . = ALIGN(16); + .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { + __setup_start = .; + *(.init.setup) + __setup_end = .; + } + .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { + __initcall_start = .; + INITCALLS + __initcall_end = .; + } + + .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { + __con_initcall_start = .; + *(.con_initcall.init) + __con_initcall_end = .; + } + + .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { + __x86_cpu_dev_start = .; + *(.x86_cpu_dev.init) + __x86_cpu_dev_end = .; + } + + SECURITY_INIT + + . = ALIGN(8); + .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { + __parainstructions = .; + *(.parainstructions) + __parainstructions_end = .; + } + + . = ALIGN(8); + .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + + .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { + *(.altinstr_replacement) + } + + /* + * .exit.text is discard at runtime, not link time, to deal with + * references from .altinstructions and .eh_frame + */ + .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { + EXIT_TEXT + } + + .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { + EXIT_DATA + } + +#ifdef CONFIG_BLK_DEV_INITRD + . = ALIGN(PAGE_SIZE); + .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + } +#endif + +#if defined(CONFIG_X86_64) && defined(CONFIG_SMP) + /* + * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the + * output PHDR, so the next output section - __data_nosave - should + * start another section data.init2. Also, pda should be at the head of + * percpu area. Preallocate it and define the percpu offset symbol + * so that it can be accessed as a percpu variable. + */ + . = ALIGN(PAGE_SIZE); + PERCPU_VADDR(0, :percpu) +#else + PERCPU(PAGE_SIZE) +#endif + + . = ALIGN(PAGE_SIZE); + + /* freed after init ends here */ + .init.end : AT(ADDR(.init.end) - LOAD_OFFSET) { + __init_end = .; + } + +#ifdef CONFIG_X86_64 + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { + . = ALIGN(PAGE_SIZE); + __nosave_begin = .; + *(.data.nosave) + . = ALIGN(PAGE_SIZE); + __nosave_end = .; + } :data.init2 + /* use another section data.init2, see PERCPU_VADDR() above */ +#endif + + /* BSS */ + . = ALIGN(PAGE_SIZE); + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { + __bss_start = .; + *(.bss.page_aligned) + *(.bss) + . = ALIGN(4); + __bss_stop = .; + } + + . = ALIGN(PAGE_SIZE); + .brk : AT(ADDR(.brk) - LOAD_OFFSET) { + __brk_base = .; + . += 64 * 1024; /* 64k alignment slop space */ + *(.brk_reservation) /* areas brk users have reserved */ + __brk_limit = .; + } + + .end : AT(ADDR(.end) - LOAD_OFFSET) { + _end = .; + } + + /* Sections to be discarded */ + /DISCARD/ : { + *(.exitcall.exit) + *(.eh_frame) + *(.discard) + } + + STABS_DEBUG + DWARF_DEBUG +} + + +#ifdef CONFIG_X86_32 +ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE") +#else +/* + * Per-cpu symbols which need to be offset from __per_cpu_load + * for the boot processor. + */ +#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load +INIT_PER_CPU(gdt_page); +INIT_PER_CPU(irq_stack_union); + +/* + * Build-time check on the image size: + */ +ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE") + +#ifdef CONFIG_SMP +ASSERT((per_cpu__irq_stack_union == 0), + "irq_stack_union is not at start of per-cpu area"); +#endif + +#endif /* CONFIG_X86_32 */ + +#ifdef CONFIG_KEXEC +#include <asm/kexec.h> + +ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, + "kexec control code size is too big") +#endif + diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S deleted file mode 100644 index 62ad500d55f3..000000000000 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ /dev/null @@ -1,229 +0,0 @@ -/* ld script to make i386 Linux kernel - * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>; - * - * Don't define absolute symbols until and unless you know that symbol - * value is should remain constant even if kernel image is relocated - * at run time. Absolute symbols are not relocated. If symbol value should - * change if kernel is relocated, make the symbol section relative and - * put it inside the section definition. - */ - -#define LOAD_OFFSET __PAGE_OFFSET - -#include <asm-generic/vmlinux.lds.h> -#include <asm/thread_info.h> -#include <asm/page_types.h> -#include <asm/cache.h> -#include <asm/boot.h> - -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(phys_startup_32) -jiffies = jiffies_64; - -PHDRS { - text PT_LOAD FLAGS(5); /* R_E */ - data PT_LOAD FLAGS(7); /* RWE */ - note PT_NOTE FLAGS(0); /* ___ */ -} -SECTIONS -{ - . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; - phys_startup_32 = startup_32 - LOAD_OFFSET; - - .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { - _text = .; /* Text and read-only data */ - *(.text.head) - } :text = 0x9090 - - /* read-only */ - .text : AT(ADDR(.text) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); /* not really needed, already page aligned */ - *(.text.page_aligned) - TEXT_TEXT - SCHED_TEXT - LOCK_TEXT - KPROBES_TEXT - IRQENTRY_TEXT - *(.fixup) - *(.gnu.warning) - _etext = .; /* End of text section */ - } :text = 0x9090 - - NOTES :text :note - - . = ALIGN(16); /* Exception table */ - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } :text = 0x9090 - - RODATA - - /* writeable */ - . = ALIGN(PAGE_SIZE); - .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ - DATA_DATA - CONSTRUCTORS - } :data - - . = ALIGN(PAGE_SIZE); - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { - __nosave_begin = .; - *(.data.nosave) - . = ALIGN(PAGE_SIZE); - __nosave_end = .; - } - - . = ALIGN(PAGE_SIZE); - .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { - *(.data.page_aligned) - *(.data.idt) - } - - . = ALIGN(32); - .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { - *(.data.cacheline_aligned) - } - - /* rarely changed data like cpu maps */ - . = ALIGN(32); - .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { - *(.data.read_mostly) - _edata = .; /* End of data section */ - } - - . = ALIGN(THREAD_SIZE); /* init_task */ - .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { - *(.data.init_task) - } - - /* might get freed after init */ - . = ALIGN(PAGE_SIZE); - .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { - __smp_locks = .; - *(.smp_locks) - __smp_locks_end = .; - } - /* will be freed after init - * Following ALIGN() is required to make sure no other data falls on the - * same page where __smp_alt_end is pointing as that page might be freed - * after boot. Always make sure that ALIGN() directive is present after - * the section which contains __smp_alt_end. - */ - . = ALIGN(PAGE_SIZE); - - /* will be freed after init */ - . = ALIGN(PAGE_SIZE); /* Init code and data */ - .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { - __init_begin = .; - _sinittext = .; - INIT_TEXT - _einittext = .; - } - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { - INIT_DATA - } - . = ALIGN(16); - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { - __setup_start = .; - *(.init.setup) - __setup_end = .; - } - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - __initcall_start = .; - INITCALLS - __initcall_end = .; - } - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; - } - .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { - __x86_cpu_dev_start = .; - *(.x86_cpu_dev.init) - __x86_cpu_dev_end = .; - } - SECURITY_INIT - . = ALIGN(4); - .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { - __alt_instructions = .; - *(.altinstructions) - __alt_instructions_end = .; - } - .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { - *(.altinstr_replacement) - } - . = ALIGN(4); - .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { - __parainstructions = .; - *(.parainstructions) - __parainstructions_end = .; - } - /* .exit.text is discard at runtime, not link time, to deal with references - from .altinstructions and .eh_frame */ - .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { - EXIT_TEXT - } - .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { - EXIT_DATA - } -#if defined(CONFIG_BLK_DEV_INITRD) - . = ALIGN(PAGE_SIZE); - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; - } -#endif - PERCPU(PAGE_SIZE) - . = ALIGN(PAGE_SIZE); - /* freed after init ends here */ - - .bss : AT(ADDR(.bss) - LOAD_OFFSET) { - __init_end = .; - __bss_start = .; /* BSS */ - *(.bss.page_aligned) - *(.bss) - . = ALIGN(4); - __bss_stop = .; - } - - .brk : AT(ADDR(.brk) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - __brk_base = . ; - . += 64 * 1024 ; /* 64k alignment slop space */ - *(.brk_reservation) /* areas brk users have reserved */ - __brk_limit = . ; - } - - .end : AT(ADDR(.end) - LOAD_OFFSET) { - _end = . ; - } - - /* Sections to be discarded */ - /DISCARD/ : { - *(.exitcall.exit) - *(.discard) - } - - STABS_DEBUG - - DWARF_DEBUG -} - -/* - * Build-time check on the image size: - */ -ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE") - -#ifdef CONFIG_KEXEC -/* Link time checks */ -#include <asm/kexec.h> - -ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big") -#endif diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S deleted file mode 100644 index c8742507b030..000000000000 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ /dev/null @@ -1,298 +0,0 @@ -/* ld script to make x86-64 Linux kernel - * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>; - */ - -#define LOAD_OFFSET __START_KERNEL_map - -#include <asm-generic/vmlinux.lds.h> -#include <asm/asm-offsets.h> -#include <asm/page_types.h> - -#undef i386 /* in case the preprocessor is a 32bit one */ - -OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") -OUTPUT_ARCH(i386:x86-64) -ENTRY(phys_startup_64) -jiffies_64 = jiffies; -PHDRS { - text PT_LOAD FLAGS(5); /* R_E */ - data PT_LOAD FLAGS(7); /* RWE */ - user PT_LOAD FLAGS(7); /* RWE */ - data.init PT_LOAD FLAGS(7); /* RWE */ -#ifdef CONFIG_SMP - percpu PT_LOAD FLAGS(7); /* RWE */ -#endif - data.init2 PT_LOAD FLAGS(7); /* RWE */ - note PT_NOTE FLAGS(0); /* ___ */ -} -SECTIONS -{ - . = __START_KERNEL; - phys_startup_64 = startup_64 - LOAD_OFFSET; - .text : AT(ADDR(.text) - LOAD_OFFSET) { - _text = .; /* Text and read-only data */ - /* First the code that has to be first for bootstrapping */ - *(.text.head) - _stext = .; - /* Then the rest */ - TEXT_TEXT - SCHED_TEXT - LOCK_TEXT - KPROBES_TEXT - IRQENTRY_TEXT - *(.fixup) - *(.gnu.warning) - _etext = .; /* End of text section */ - } :text = 0x9090 - - NOTES :text :note - - . = ALIGN(16); /* Exception table */ - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } :text = 0x9090 - - RODATA - - . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ - /* Data */ - .data : AT(ADDR(.data) - LOAD_OFFSET) { - DATA_DATA - CONSTRUCTORS - _edata = .; /* End of data section */ - } :data - - - .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - *(.data.cacheline_aligned) - } - . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); - .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { - *(.data.read_mostly) - } - -#define VSYSCALL_ADDR (-10*1024*1024) -#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) -#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) - -#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) -#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) - -#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR) -#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) - - . = VSYSCALL_ADDR; - .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } :user - __vsyscall_0 = VSYSCALL_VIRT_ADDR; - - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) } - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) - { *(.vsyscall_gtod_data) } - vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); - .vsyscall_clock : AT(VLOAD(.vsyscall_clock)) - { *(.vsyscall_clock) } - vsyscall_clock = VVIRT(.vsyscall_clock); - - - .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) - { *(.vsyscall_1) } - .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) - { *(.vsyscall_2) } - - .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) } - vgetcpu_mode = VVIRT(.vgetcpu_mode); - - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) } - jiffies = VVIRT(.jiffies); - - .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) - { *(.vsyscall_3) } - - . = VSYSCALL_VIRT_ADDR + PAGE_SIZE; - -#undef VSYSCALL_ADDR -#undef VSYSCALL_PHYS_ADDR -#undef VSYSCALL_VIRT_ADDR -#undef VLOAD_OFFSET -#undef VLOAD -#undef VVIRT_OFFSET -#undef VVIRT - - .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { - . = ALIGN(THREAD_SIZE); /* init_task */ - *(.data.init_task) - }:data.init - - .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - *(.data.page_aligned) - } - - .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { - /* might get freed after init */ - . = ALIGN(PAGE_SIZE); - __smp_alt_begin = .; - __smp_locks = .; - *(.smp_locks) - __smp_locks_end = .; - . = ALIGN(PAGE_SIZE); - __smp_alt_end = .; - } - - . = ALIGN(PAGE_SIZE); /* Init code and data */ - __init_begin = .; /* paired with __init_end */ - .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { - _sinittext = .; - INIT_TEXT - _einittext = .; - } - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { - __initdata_begin = .; - INIT_DATA - __initdata_end = .; - } - - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { - . = ALIGN(16); - __setup_start = .; - *(.init.setup) - __setup_end = .; - } - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - __initcall_start = .; - INITCALLS - __initcall_end = .; - } - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; - } - .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { - __x86_cpu_dev_start = .; - *(.x86_cpu_dev.init) - __x86_cpu_dev_end = .; - } - SECURITY_INIT - - . = ALIGN(8); - .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { - __parainstructions = .; - *(.parainstructions) - __parainstructions_end = .; - } - - .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { - . = ALIGN(8); - __alt_instructions = .; - *(.altinstructions) - __alt_instructions_end = .; - } - .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { - *(.altinstr_replacement) - } - /* .exit.text is discard at runtime, not link time, to deal with references - from .altinstructions and .eh_frame */ - .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { - EXIT_TEXT - } - .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { - EXIT_DATA - } - -#ifdef CONFIG_BLK_DEV_INITRD - . = ALIGN(PAGE_SIZE); - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; - } -#endif - -#ifdef CONFIG_SMP - /* - * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the - * output PHDR, so the next output section - __data_nosave - should - * start another section data.init2. Also, pda should be at the head of - * percpu area. Preallocate it and define the percpu offset symbol - * so that it can be accessed as a percpu variable. - */ - . = ALIGN(PAGE_SIZE); - PERCPU_VADDR(0, :percpu) -#else - PERCPU(PAGE_SIZE) -#endif - - . = ALIGN(PAGE_SIZE); - __init_end = .; - - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - __nosave_begin = .; - *(.data.nosave) - . = ALIGN(PAGE_SIZE); - __nosave_end = .; - } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */ - - .bss : AT(ADDR(.bss) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - __bss_start = .; /* BSS */ - *(.bss.page_aligned) - *(.bss) - __bss_stop = .; - } - - .brk : AT(ADDR(.brk) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - __brk_base = . ; - . += 64 * 1024 ; /* 64k alignment slop space */ - *(.brk_reservation) /* areas brk users have reserved */ - __brk_limit = . ; - } - - _end = . ; - - /* Sections to be discarded */ - /DISCARD/ : { - *(.exitcall.exit) - *(.eh_frame) - *(.discard) - } - - STABS_DEBUG - - DWARF_DEBUG -} - - /* - * Per-cpu symbols which need to be offset from __per_cpu_load - * for the boot processor. - */ -#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load -INIT_PER_CPU(gdt_page); -INIT_PER_CPU(irq_stack_union); - -/* - * Build-time check on the image size: - */ -ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE") - -#ifdef CONFIG_SMP -ASSERT((per_cpu__irq_stack_union == 0), - "irq_stack_union is not at start of per-cpu area"); -#endif - -#ifdef CONFIG_KEXEC -#include <asm/kexec.h> - -ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big") -#endif diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 44153afc9067..25ee06a80aad 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -132,15 +132,7 @@ static __always_inline void do_vgettimeofday(struct timeval * tv) return; } - /* - * Surround the RDTSC by barriers, to make sure it's not - * speculated to outside the seqlock critical section and - * does not cause time warps: - */ - rdtsc_barrier(); now = vread(); - rdtsc_barrier(); - base = __vsyscall_gtod_data.clock.cycle_last; mask = __vsyscall_gtod_data.clock.mask; mult = __vsyscall_gtod_data.clock.mult; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b6caf1329b1b..32cf11e5728a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2897,8 +2897,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) { - kvm_x86_ops->tlb_flush(vcpu); - set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); + kvm_set_cr3(vcpu, vcpu->arch.cr3); return 1; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 49079a46687b..3944e917e794 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -338,6 +338,9 @@ EXPORT_SYMBOL_GPL(kvm_lmsw); void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { + unsigned long old_cr4 = vcpu->arch.cr4; + unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; + if (cr4 & CR4_RESERVED_BITS) { printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); kvm_inject_gp(vcpu, 0); @@ -351,7 +354,8 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) kvm_inject_gp(vcpu, 0); return; } - } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE) + } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) + && ((cr4 ^ old_cr4) & pdptr_bits) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); kvm_inject_gp(vcpu, 0); diff --git a/arch/x86/lguest/Makefile b/arch/x86/lguest/Makefile index 27f0c9ed7f60..94e0e54056a9 100644 --- a/arch/x86/lguest/Makefile +++ b/arch/x86/lguest/Makefile @@ -1 +1,2 @@ obj-y := i386_head.o boot.o +CFLAGS_boot.o := $(call cc-option, -fno-stack-protector) diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index ca7ec44bafc3..4e0c26559395 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -67,6 +67,7 @@ #include <asm/mce.h> #include <asm/io.h> #include <asm/i387.h> +#include <asm/stackprotector.h> #include <asm/reboot.h> /* for struct machine_ops */ /*G:010 Welcome to the Guest! @@ -166,10 +167,16 @@ static void lazy_hcall3(unsigned long call, /* When lazy mode is turned off reset the per-cpu lazy mode variable and then * issue the do-nothing hypercall to flush any stored calls. */ -static void lguest_leave_lazy_mode(void) +static void lguest_leave_lazy_mmu_mode(void) { - paravirt_leave_lazy(paravirt_get_lazy_mode()); kvm_hypercall0(LHCALL_FLUSH_ASYNC); + paravirt_leave_lazy_mmu(); +} + +static void lguest_end_context_switch(struct task_struct *next) +{ + kvm_hypercall0(LHCALL_FLUSH_ASYNC); + paravirt_end_context_switch(next); } /*G:033 @@ -636,7 +643,7 @@ static void __init lguest_init_IRQ(void) void lguest_setup_irq(unsigned int irq) { - irq_to_desc_alloc_cpu(irq, 0); + irq_to_desc_alloc_node(irq, 0); set_irq_chip_and_handler_name(irq, &lguest_irq_controller, handle_level_irq, "level"); } @@ -1053,8 +1060,8 @@ __init void lguest_init(void) pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry; pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; pv_cpu_ops.wbinvd = lguest_wbinvd; - pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu; - pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode; + pv_cpu_ops.start_context_switch = paravirt_start_context_switch; + pv_cpu_ops.end_context_switch = lguest_end_context_switch; /* pagetable management */ pv_mmu_ops.write_cr3 = lguest_write_cr3; @@ -1067,7 +1074,7 @@ __init void lguest_init(void) pv_mmu_ops.read_cr2 = lguest_read_cr2; pv_mmu_ops.read_cr3 = lguest_read_cr3; pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; - pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode; + pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode; pv_mmu_ops.pte_update = lguest_pte_update; pv_mmu_ops.pte_update_defer = lguest_pte_update; @@ -1088,13 +1095,21 @@ __init void lguest_init(void) * lguest_init() where the rest of the fairly chaotic boot setup * occurs. */ + /* The stack protector is a weird thing where gcc places a canary + * value on the stack and then checks it on return. This file is + * compiled with -fno-stack-protector it, so we got this far without + * problems. The value of the canary is kept at offset 20 from the + * %gs register, so we need to set that up before calling C functions + * in other files. */ + setup_stack_canary_segment(0); + /* We could just call load_stack_canary_segment(), but we might as + * call switch_to_new_gdt() which loads the whole table and sets up + * the per-cpu segment descriptor register %fs as well. */ + switch_to_new_gdt(0); + /* As described in head_32.S, we map the first 128M of memory. */ max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT; - /* Load the %fs segment register (the per-cpu segment register) with - * the normal data segment to get through booting. */ - asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); - /* The Host<->Guest Switcher lives at the top of our address space, and * the Host told us how big it is when we made LGUEST_INIT hypercall: * it put the answer in lguest_data.reserve_mem */ diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index e7277cbcfb40..a725b7f760ae 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -161,13 +161,14 @@ static void note_page(struct seq_file *m, struct pg_state *st, st->current_address >= st->marker[1].start_address) { const char *unit = units; unsigned long delta; + int width = sizeof(unsigned long) * 2; /* * Now print the actual finished series */ - seq_printf(m, "0x%p-0x%p ", - (void *)st->start_address, - (void *)st->current_address); + seq_printf(m, "0x%0*lx-0x%0*lx ", + width, st->start_address, + width, st->current_address); delta = (st->current_address - st->start_address) >> 10; while (!(delta & 1023) && unit[1]) { diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a03b7279efa0..5ec7ae366615 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -3,40 +3,16 @@ * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar */ -#include <linux/interrupt.h> -#include <linux/mmiotrace.h> -#include <linux/bootmem.h> -#include <linux/compiler.h> -#include <linux/highmem.h> -#include <linux/kprobes.h> -#include <linux/uaccess.h> -#include <linux/vmalloc.h> -#include <linux/vt_kern.h> -#include <linux/signal.h> -#include <linux/kernel.h> -#include <linux/ptrace.h> -#include <linux/string.h> -#include <linux/module.h> -#include <linux/kdebug.h> -#include <linux/errno.h> -#include <linux/magic.h> -#include <linux/sched.h> -#include <linux/types.h> -#include <linux/init.h> -#include <linux/mman.h> -#include <linux/tty.h> -#include <linux/smp.h> -#include <linux/mm.h> - -#include <asm-generic/sections.h> - -#include <asm/tlbflush.h> -#include <asm/pgalloc.h> -#include <asm/segment.h> -#include <asm/system.h> -#include <asm/proto.h> -#include <asm/traps.h> -#include <asm/desc.h> +#include <linux/magic.h> /* STACK_END_MAGIC */ +#include <linux/sched.h> /* test_thread_flag(), ... */ +#include <linux/kdebug.h> /* oops_begin/end, ... */ +#include <linux/module.h> /* search_exception_table */ +#include <linux/bootmem.h> /* max_low_pfn */ +#include <linux/kprobes.h> /* __kprobes, ... */ +#include <linux/mmiotrace.h> /* kmmio_handler, ... */ + +#include <asm/traps.h> /* dotraplinkage, ... */ +#include <asm/pgalloc.h> /* pgd_*(), ... */ /* * Page fault error code bits: @@ -225,12 +201,10 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) if (!pmd_present(*pmd_k)) return NULL; - if (!pmd_present(*pmd)) { + if (!pmd_present(*pmd)) set_pmd(pmd, *pmd_k); - arch_flush_lazy_mmu_mode(); - } else { + else BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); - } return pmd_k; } @@ -538,8 +512,6 @@ bad: static int is_errata93(struct pt_regs *regs, unsigned long address) { #ifdef CONFIG_X86_64 - static int once; - if (address != regs->ip) return 0; @@ -549,10 +521,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address) address |= 0xffffffffUL << 32; if ((address >= (u64)_stext && address <= (u64)_etext) || (address >= MODULES_VADDR && address <= MODULES_END)) { - if (!once) { - printk(errata93_warning); - once = 1; - } + printk_once(errata93_warning); regs->ip = address; return 1; } diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 8126e8d1a2a4..58f621e81919 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -44,7 +44,6 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); BUG_ON(!pte_none(*(kmap_pte-idx))); set_pte(kmap_pte-idx, mk_pte(page, prot)); - arch_flush_lazy_mmu_mode(); return (void *)vaddr; } @@ -74,7 +73,6 @@ void kunmap_atomic(void *kvaddr, enum km_type type) #endif } - arch_flush_lazy_mmu_mode(); pagefault_enable(); } diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 8f307d914c2e..f46c340727b8 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -26,12 +26,16 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma, unsigned long sbase = saddr & PUD_MASK; unsigned long s_end = sbase + PUD_SIZE; + /* Allow segments to share if only one is marked locked */ + unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; + unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; + /* * match the virtual addresses, permission and the alignment of the * page table page. */ if (pmd_index(addr) != pmd_index(saddr) || - vma->vm_flags != svma->vm_flags || + vm_flags != svm_flags || sbase < svma->vm_start || svma->vm_end < s_end) return 0; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index ae4f7b5d7104..34c1bfb64f1c 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -1,3 +1,4 @@ +#include <linux/initrd.h> #include <linux/ioport.h> #include <linux/swap.h> @@ -10,6 +11,9 @@ #include <asm/setup.h> #include <asm/system.h> #include <asm/tlbflush.h> +#include <asm/tlb.h> + +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); unsigned long __initdata e820_table_start; unsigned long __meminitdata e820_table_end; @@ -23,6 +27,69 @@ int direct_gbpages #endif ; +int nx_enabled; + +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) +static int disable_nx __cpuinitdata; + +/* + * noexec = on|off + * + * Control non-executable mappings for processes. + * + * on Enable + * off Disable + */ +static int __init noexec_setup(char *str) +{ + if (!str) + return -EINVAL; + if (!strncmp(str, "on", 2)) { + __supported_pte_mask |= _PAGE_NX; + disable_nx = 0; + } else if (!strncmp(str, "off", 3)) { + disable_nx = 1; + __supported_pte_mask &= ~_PAGE_NX; + } + return 0; +} +early_param("noexec", noexec_setup); +#endif + +#ifdef CONFIG_X86_PAE +static void __init set_nx(void) +{ + unsigned int v[4], l, h; + + if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { + cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); + + if ((v[3] & (1 << 20)) && !disable_nx) { + rdmsr(MSR_EFER, l, h); + l |= EFER_NX; + wrmsr(MSR_EFER, l, h); + nx_enabled = 1; + __supported_pte_mask |= _PAGE_NX; + } + } +} +#else +static inline void set_nx(void) +{ +} +#endif + +#ifdef CONFIG_X86_64 +void __cpuinit check_efer(void) +{ + unsigned long efer; + + rdmsrl(MSR_EFER, efer); + if (!(efer & EFER_NX) || disable_nx) + __supported_pte_mask &= ~_PAGE_NX; +} +#endif + static void __init find_early_table_space(unsigned long end, int use_pse, int use_gbpages) { @@ -66,12 +133,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse, */ #ifdef CONFIG_X86_32 start = 0x7000; - e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, - tables, PAGE_SIZE); -#else /* CONFIG_X86_64 */ +#else start = 0x8000; - e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE); #endif + e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, + tables, PAGE_SIZE); if (e820_table_start == -1UL) panic("Cannot find space for the kernel page tables"); @@ -159,12 +225,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, use_gbpages = direct_gbpages; #endif -#ifdef CONFIG_X86_32 -#ifdef CONFIG_X86_PAE set_nx(); if (nx_enabled) printk(KERN_INFO "NX (Execute Disable) protection: active\n"); -#endif /* Enable PSE if available */ if (cpu_has_pse) @@ -175,7 +238,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, set_in_cr4(X86_CR4_PGE); __supported_pte_mask |= _PAGE_GLOBAL; } -#endif if (use_gbpages) page_size_mask |= 1 << PG_LEVEL_1G; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 749559ed80f5..949708d7a481 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -49,12 +49,9 @@ #include <asm/paravirt.h> #include <asm/setup.h> #include <asm/cacheflush.h> +#include <asm/page_types.h> #include <asm/init.h> -unsigned long max_low_pfn_mapped; -unsigned long max_pfn_mapped; - -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); unsigned long highstart_pfn, highend_pfn; static noinline int do_test_wp_bit(void); @@ -587,61 +584,9 @@ void zap_low_mappings(void) flush_tlb_all(); } -int nx_enabled; - pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); EXPORT_SYMBOL_GPL(__supported_pte_mask); -#ifdef CONFIG_X86_PAE - -static int disable_nx __initdata; - -/* - * noexec = on|off - * - * Control non executable mappings. - * - * on Enable - * off Disable - */ -static int __init noexec_setup(char *str) -{ - if (!str || !strcmp(str, "on")) { - if (cpu_has_nx) { - __supported_pte_mask |= _PAGE_NX; - disable_nx = 0; - } - } else { - if (!strcmp(str, "off")) { - disable_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; - } else { - return -EINVAL; - } - } - - return 0; -} -early_param("noexec", noexec_setup); - -void __init set_nx(void) -{ - unsigned int v[4], l, h; - - if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { - cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); - - if ((v[3] & (1 << 20)) && !disable_nx) { - rdmsr(MSR_EFER, l, h); - l |= EFER_NX; - wrmsr(MSR_EFER, l, h); - nx_enabled = 1; - __supported_pte_mask |= _PAGE_NX; - } - } -} -#endif - /* user-defined highmem size */ static unsigned int highmem_pages = -1; @@ -761,15 +706,15 @@ void __init initmem_init(unsigned long start_pfn, highstart_pfn = highend_pfn = max_pfn; if (max_pfn > max_low_pfn) highstart_pfn = max_low_pfn; - memory_present(0, 0, highend_pfn); e820_register_active_regions(0, 0, highend_pfn); + sparse_memory_present_with_active_regions(0); printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); num_physpages = highend_pfn; high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; #else - memory_present(0, 0, max_low_pfn); e820_register_active_regions(0, 0, max_low_pfn); + sparse_memory_present_with_active_regions(0); num_physpages = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1753e8020df6..52bb9519bb86 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -50,18 +50,8 @@ #include <asm/cacheflush.h> #include <asm/init.h> -/* - * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. - * The direct mapping extends to max_pfn_mapped, so that we can directly access - * apertures, ACPI and other tables without having to play with fixmaps. - */ -unsigned long max_low_pfn_mapped; -unsigned long max_pfn_mapped; - static unsigned long dma_reserve __initdata; -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - static int __init parse_direct_gbpages_off(char *arg) { direct_gbpages = 0; @@ -85,39 +75,6 @@ early_param("gbpages", parse_direct_gbpages_on); pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; EXPORT_SYMBOL_GPL(__supported_pte_mask); -static int disable_nx __cpuinitdata; - -/* - * noexec=on|off - * Control non-executable mappings for 64-bit processes. - * - * on Enable (default) - * off Disable - */ -static int __init nonx_setup(char *str) -{ - if (!str) - return -EINVAL; - if (!strncmp(str, "on", 2)) { - __supported_pte_mask |= _PAGE_NX; - disable_nx = 0; - } else if (!strncmp(str, "off", 3)) { - disable_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; - } - return 0; -} -early_param("noexec", nonx_setup); - -void __cpuinit check_efer(void) -{ - unsigned long efer; - - rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || disable_nx) - __supported_pte_mask &= ~_PAGE_NX; -} - int force_personality32; /* @@ -628,6 +585,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); } +#endif void __init paging_init(void) { @@ -638,11 +596,10 @@ void __init paging_init(void) max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; max_zone_pfns[ZONE_NORMAL] = max_pfn; - memory_present(0, 0, max_pfn); + sparse_memory_present_with_active_regions(MAX_NUMNODES); sparse_init(); free_area_init_nodes(max_zone_pfns); } -#endif /* * Memory hotplug specific functions diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 8056545e2d39..fe6f84ca121e 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -82,7 +82,6 @@ iounmap_atomic(void *kvaddr, enum km_type type) if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) kpte_clear_flush(kmap_pte-idx, vaddr); - arch_flush_lazy_mmu_mode(); pagefault_enable(); } EXPORT_SYMBOL_GPL(iounmap_atomic); diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 605c8be06217..c0bedcd10f97 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c @@ -40,23 +40,23 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) static void __init memtest(u64 pattern, u64 start_phys, u64 size) { - u64 i, count; - u64 *start; + u64 *p; + void *start, *end; u64 start_bad, last_bad; u64 start_phys_aligned; size_t incr; incr = sizeof(pattern); start_phys_aligned = ALIGN(start_phys, incr); - count = (size - (start_phys_aligned - start_phys))/incr; start = __va(start_phys_aligned); + end = start + size - (start_phys_aligned - start_phys); start_bad = 0; last_bad = 0; - for (i = 0; i < count; i++) - start[i] = pattern; - for (i = 0; i < count; i++, start++, start_phys_aligned += incr) { - if (*start == pattern) + for (p = start; p < end; p++) + *p = pattern; + for (p = start; p < end; p++, start_phys_aligned += incr) { + if (*p == pattern) continue; if (start_phys_aligned == last_bad + incr) { last_bad += incr; diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 2d05a12029dc..459913beac71 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -179,18 +179,25 @@ static void * __init early_node_mem(int nodeid, unsigned long start, } /* Initialize bootmem allocator for a node */ -void __init setup_node_bootmem(int nodeid, unsigned long start, - unsigned long end) +void __init +setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) { unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; + const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); unsigned long bootmap_start, nodedata_phys; void *bootmap; - const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); int nid; if (!end) return; + /* + * Don't confuse VM with a node that doesn't have the + * minimum amount of memory: + */ + if (end && (end - start) < NODE_MIN_SIZE) + return; + start = roundup(start, ZONE_ALIGN); printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, @@ -272,9 +279,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); -#ifdef CONFIG_ACPI_NUMA - srat_reserve_add_area(nodeid); -#endif node_set_online(nodeid); } @@ -578,21 +582,6 @@ unsigned long __init numa_free_all_bootmem(void) return pages; } -void __init paging_init(void) -{ - unsigned long max_zone_pfns[MAX_NR_ZONES]; - - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; - max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; - max_zone_pfns[ZONE_NORMAL] = max_pfn; - - sparse_memory_present_with_active_regions(MAX_NUMNODES); - sparse_init(); - - free_area_init_nodes(max_zone_pfns); -} - static __init int numa_setup(char *opt) { if (!opt) @@ -606,8 +595,6 @@ static __init int numa_setup(char *opt) #ifdef CONFIG_ACPI_NUMA if (!strncmp(opt, "noacpi", 6)) acpi_numa = -1; - if (!strncmp(opt, "hotadd=", 7)) - hotadd_percent = simple_strtoul(opt+7, NULL, 10); #endif return 0; } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 797f9f107cb6..6ce9518fe2ac 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -153,7 +153,7 @@ static void __cpa_flush_all(void *arg) */ __flush_tlb_all(); - if (cache && boot_cpu_data.x86_model >= 4) + if (cache && boot_cpu_data.x86 >= 4) wbinvd(); } @@ -208,20 +208,15 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, int in_flags, struct page **pages) { unsigned int i, level; + unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ BUG_ON(irqs_disabled()); - on_each_cpu(__cpa_flush_range, NULL, 1); + on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1); - if (!cache) + if (!cache || do_wbinvd) return; - /* 4M threshold */ - if (numpages >= 1024) { - if (boot_cpu_data.x86_model >= 4) - wbinvd(); - return; - } /* * We only need to flush on one CPU, * clflush is a MESI-coherent instruction that @@ -844,13 +839,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, vm_unmap_aliases(); - /* - * If we're called with lazy mmu updates enabled, the - * in-memory pte state may be stale. Flush pending updates to - * bring them up to date. - */ - arch_flush_lazy_mmu_mode(); - cpa.vaddr = addr; cpa.pages = pages; cpa.numpages = numpages; @@ -895,13 +883,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, } else cpa_flush_all(cache); - /* - * If we've been called with lazy mmu updates enabled, then - * make sure that everything gets flushed out before we - * return. - */ - arch_flush_lazy_mmu_mode(); - out: return ret; } diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 01765955baaf..2dfcbf9df2ae 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -31,17 +31,11 @@ static nodemask_t nodes_parsed __initdata; static nodemask_t cpu_nodes_parsed __initdata; static struct bootnode nodes[MAX_NUMNODES] __initdata; static struct bootnode nodes_add[MAX_NUMNODES]; -static int found_add_area __initdata; -int hotadd_percent __initdata = 0; static int num_node_memblks __initdata; static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; -/* Too small nodes confuse the VM badly. Usually they result - from BIOS bugs. */ -#define NODE_MIN_SIZE (4*1024*1024) - static __init int setup_node(int pxm) { return acpi_map_pxm_to_node(pxm); @@ -66,9 +60,6 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end) { struct bootnode *nd = &nodes[i]; - if (found_add_area) - return; - if (nd->start < start) { nd->start = start; if (nd->end < nd->start) @@ -86,7 +77,6 @@ static __init void bad_srat(void) int i; printk(KERN_ERR "SRAT: SRAT not used.\n"); acpi_numa = -1; - found_add_area = 0; for (i = 0; i < MAX_LOCAL_APIC; i++) apicid_to_node[i] = NUMA_NO_NODE; for (i = 0; i < MAX_NUMNODES; i++) @@ -182,24 +172,21 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) pxm, apic_id, node); } -static int update_end_of_memory(unsigned long end) {return -1;} -static int hotadd_enough_memory(struct bootnode *nd) {return 1;} #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE static inline int save_add_info(void) {return 1;} #else static inline int save_add_info(void) {return 0;} #endif /* - * Update nodes_add and decide if to include add are in the zone. - * Both SPARSE and RESERVE need nodes_add information. - * This code supports one contiguous hot add area per node. + * Update nodes_add[] + * This code supports one contiguous hot add area per node */ -static int __init -reserve_hotadd(int node, unsigned long start, unsigned long end) +static void __init +update_nodes_add(int node, unsigned long start, unsigned long end) { unsigned long s_pfn = start >> PAGE_SHIFT; unsigned long e_pfn = end >> PAGE_SHIFT; - int ret = 0, changed = 0; + int changed = 0; struct bootnode *nd = &nodes_add[node]; /* I had some trouble with strange memory hotadd regions breaking @@ -210,7 +197,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) mistakes */ if ((signed long)(end - start) < NODE_MIN_SIZE) { printk(KERN_ERR "SRAT: Hotplug area too small\n"); - return -1; + return; } /* This check might be a bit too strict, but I'm keeping it for now. */ @@ -218,12 +205,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) printk(KERN_ERR "SRAT: Hotplug area %lu -> %lu has existing memory\n", s_pfn, e_pfn); - return -1; - } - - if (!hotadd_enough_memory(&nodes_add[node])) { - printk(KERN_ERR "SRAT: Hotplug area too large\n"); - return -1; + return; } /* Looks good */ @@ -245,11 +227,9 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); } - ret = update_end_of_memory(nd->end); - if (changed) - printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end); - return ret; + printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", + nd->start, nd->end); } /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ @@ -310,13 +290,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) start, end); e820_register_active_regions(node, start >> PAGE_SHIFT, end >> PAGE_SHIFT); - push_node_boundaries(node, nd->start >> PAGE_SHIFT, - nd->end >> PAGE_SHIFT); - if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && - (reserve_hotadd(node, start, end) < 0)) { - /* Ignore hotadd region. Undo damage */ - printk(KERN_NOTICE "SRAT: Hotplug region ignored\n"); + if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { + update_nodes_add(node, start, end); + /* restore nodes[node] */ *nd = oldnode; if ((nd->start | nd->end) == 0) node_clear(node, nodes_parsed); @@ -345,9 +322,9 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) pxmram = 0; } - e820ram = max_pfn - absent_pages_in_range(0, max_pfn); - /* We seem to lose 3 pages somewhere. Allow a bit of slack. */ - if ((long)(e820ram - pxmram) >= 1*1024*1024) { + e820ram = max_pfn - (e820_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT); + /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ + if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) { printk(KERN_ERR "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n", (pxmram << PAGE_SHIFT) >> 20, @@ -357,17 +334,6 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) return 1; } -static void __init unparse_node(int node) -{ - int i; - node_clear(node, nodes_parsed); - node_clear(node, cpu_nodes_parsed); - for (i = 0; i < MAX_LOCAL_APIC; i++) { - if (apicid_to_node[i] == node) - apicid_to_node[i] = NUMA_NO_NODE; - } -} - void __init acpi_numa_arch_fixup(void) {} /* Use the information discovered above to actually set up the nodes. */ @@ -379,18 +345,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) return -1; /* First clean up the node list */ - for (i = 0; i < MAX_NUMNODES; i++) { + for (i = 0; i < MAX_NUMNODES; i++) cutoff_node(i, start, end); - /* - * don't confuse VM with a node that doesn't have the - * minimum memory. - */ - if (nodes[i].end && - (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) { - unparse_node(i); - node_set_offline(i); - } - } if (!nodes_cover_memory(nodes)) { bad_srat(); @@ -423,7 +379,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) if (node == NUMA_NO_NODE) continue; - if (!node_isset(node, node_possible_map)) + if (!node_online(node)) numa_clear_node(i); } numa_init_array(); @@ -510,26 +466,6 @@ static int null_slit_node_compare(int a, int b) } #endif /* CONFIG_NUMA_EMU */ -void __init srat_reserve_add_area(int nodeid) -{ - if (found_add_area && nodes_add[nodeid].end) { - u64 total_mb; - - printk(KERN_INFO "SRAT: Reserving hot-add memory space " - "for node %d at %Lx-%Lx\n", - nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end); - total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start) - >> PAGE_SHIFT; - total_mb *= sizeof(struct page); - total_mb >>= 20; - printk(KERN_INFO "SRAT: This will cost you %Lu MB of " - "pre-allocated memory.\n", (unsigned long long)total_mb); - reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start, - nodes_add[nodeid].end - nodes_add[nodeid].start, - BOOTMEM_DEFAULT); - } -} - int __node_distance(int a, int b) { int index; diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 04df67f8a7ba..044897be021f 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -76,9 +76,9 @@ void x86_backtrace(struct pt_regs * const regs, unsigned int depth) { struct frame_head *head = (struct frame_head *)frame_pointer(regs); - unsigned long stack = kernel_trap_sp(regs); if (!user_mode_vm(regs)) { + unsigned long stack = kernel_stack_pointer(regs); if (depth) dump_trace(NULL, regs, (unsigned long *)stack, 0, &backtrace_ops, &depth); diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 202864ad49a7..3b285e656e27 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -356,14 +356,11 @@ static void exit_sysfs(void) #define exit_sysfs() do { } while (0) #endif /* CONFIG_PM */ -static int p4force; -module_param(p4force, int, 0); - static int __init p4_init(char **cpu_type) { __u8 cpu_model = boot_cpu_data.x86_model; - if (!p4force && (cpu_model > 6 || cpu_model == 5)) + if (cpu_model > 6 || cpu_model == 5) return 0; #ifndef CONFIG_SMP @@ -389,10 +386,25 @@ static int __init p4_init(char **cpu_type) return 0; } +static int force_arch_perfmon; +static int force_cpu_type(const char *str, struct kernel_param *kp) +{ + if (!strcmp(str, "archperfmon")) { + force_arch_perfmon = 1; + printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); + } + + return 0; +} +module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); + static int __init ppro_init(char **cpu_type) { __u8 cpu_model = boot_cpu_data.x86_model; + if (force_arch_perfmon && cpu_has_arch_perfmon) + return 0; + switch (cpu_model) { case 0 ... 2: *cpu_type = "i386/ppro"; @@ -414,6 +426,13 @@ static int __init ppro_init(char **cpu_type) case 15: case 23: *cpu_type = "i386/core_2"; break; + case 26: + arch_perfmon_setup_counters(); + *cpu_type = "i386/core_i7"; + break; + case 28: + *cpu_type = "i386/atom"; + break; default: /* Unknown */ return 0; diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index fecbce6e7d7c..0696d506c4ad 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -889,6 +889,9 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) return 0; } + if (io_apic_assign_pci_irqs) + return 0; + /* Find IRQ routing entry */ if (!pirq_table) @@ -1039,56 +1042,15 @@ static void __init pcibios_fixup_irqs(void) pirq_penalty[dev->irq]++; } + if (io_apic_assign_pci_irqs) + return; + dev = NULL; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); if (!pin) continue; -#ifdef CONFIG_X86_IO_APIC - /* - * Recalculate IRQ numbers if we use the I/O APIC. - */ - if (io_apic_assign_pci_irqs) { - int irq; - - /* - * interrupt pins are numbered starting from 1 - */ - irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, - PCI_SLOT(dev->devfn), pin - 1); - /* - * Busses behind bridges are typically not listed in the - * MP-table. In this case we have to look up the IRQ - * based on the parent bus, parent slot, and pin number. - * The SMP code detects such bridged busses itself so we - * should get into this branch reliably. - */ - if (irq < 0 && dev->bus->parent) { - /* go back to the bridge */ - struct pci_dev *bridge = dev->bus->self; - int bus; - - pin = pci_swizzle_interrupt_pin(dev, pin); - bus = bridge->bus->number; - irq = IO_APIC_get_PCI_irq_vector(bus, - PCI_SLOT(bridge->devfn), pin - 1); - if (irq >= 0) - dev_warn(&dev->dev, - "using bridge %s INT %c to " - "get IRQ %d\n", - pci_name(bridge), - 'A' + pin - 1, irq); - } - if (irq >= 0) { - dev_info(&dev->dev, - "PCI->APIC IRQ transform: INT %c " - "-> IRQ %d\n", - 'A' + pin - 1, irq); - dev->irq = irq; - } - } -#endif /* * Still no IRQ? Try to lookup one... */ @@ -1183,6 +1145,19 @@ int __init pcibios_irq_init(void) pcibios_enable_irq = pirq_enable_irq; pcibios_fixup_irqs(); + + if (io_apic_assign_pci_irqs && pci_routeirq) { + struct pci_dev *dev = NULL; + /* + * PCI IRQ routing is set up by pci_enable_device(), but we + * also do it here in case there are still broken drivers that + * don't use pci_enable_device(). + */ + printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n"); + for_each_pci_dev(dev) + pirq_enable_irq(dev); + } + return 0; } @@ -1213,16 +1188,23 @@ void pcibios_penalize_isa_irq(int irq, int active) static int pirq_enable_irq(struct pci_dev *dev) { u8 pin; - struct pci_dev *temp_dev; pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); - if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { + if (pin && !pcibios_lookup_irq(dev, 1)) { char *msg = ""; + if (!io_apic_assign_pci_irqs && dev->irq) + return 0; + if (io_apic_assign_pci_irqs) { +#ifdef CONFIG_X86_IO_APIC + struct pci_dev *temp_dev; int irq; + struct io_apic_irq_attr irq_attr; - irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin - 1); + irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, + PCI_SLOT(dev->devfn), + pin - 1, &irq_attr); /* * Busses behind bridges are typically not listed in the MP-table. * In this case we have to look up the IRQ based on the parent bus, @@ -1235,7 +1217,8 @@ static int pirq_enable_irq(struct pci_dev *dev) pin = pci_swizzle_interrupt_pin(dev, pin); irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, - PCI_SLOT(bridge->devfn), pin - 1); + PCI_SLOT(bridge->devfn), + pin - 1, &irq_attr); if (irq >= 0) dev_warn(&dev->dev, "using bridge %s " "INT %c to get IRQ %d\n", @@ -1245,12 +1228,15 @@ static int pirq_enable_irq(struct pci_dev *dev) } dev = temp_dev; if (irq >= 0) { + io_apic_set_pci_routing(&dev->dev, irq, + &irq_attr); + dev->irq = irq; dev_info(&dev->dev, "PCI->APIC IRQ transform: " "INT %c -> IRQ %d\n", 'A' + pin - 1, irq); - dev->irq = irq; return 0; } else msg = "; probably buggy MP table"; +#endif } else if (pci_probe & PCI_BIOS_IRQ_SCAN) msg = ""; else diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 5fa10bb9604f..8766b0e216c5 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -375,7 +375,7 @@ static acpi_status __init check_mcfg_resource(struct acpi_resource *res, if (!fixmem32) return AE_OK; if ((mcfg_res->start >= fixmem32->address) && - (mcfg_res->end <= (fixmem32->address + + (mcfg_res->end < (fixmem32->address + fixmem32->address_length))) { mcfg_res->flags = 1; return AE_CTRL_TERMINATE; @@ -392,7 +392,7 @@ static acpi_status __init check_mcfg_resource(struct acpi_resource *res, return AE_OK; if ((mcfg_res->start >= address.minimum) && - (mcfg_res->end <= (address.minimum + address.address_length))) { + (mcfg_res->end < (address.minimum + address.address_length))) { mcfg_res->flags = 1; return AE_CTRL_TERMINATE; } @@ -418,7 +418,7 @@ static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used) struct resource mcfg_res; mcfg_res.start = start; - mcfg_res.end = end; + mcfg_res.end = end - 1; mcfg_res.flags = 0; acpi_get_devices("PNP0C01", find_mboard_resource, &mcfg_res, NULL); diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 7133cdf9098b..cac083386e03 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -8,6 +8,7 @@ #include <linux/sched.h> #include <linux/init.h> #include <linux/random.h> +#include <linux/elf.h> #include <asm/vsyscall.h> #include <asm/vgtod.h> #include <asm/proto.h> diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 3b767d03fd6a..172438f86a02 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -9,5 +9,6 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ time.o xen-asm.o xen-asm_$(BITS).o \ grant-table.o suspend.o -obj-$(CONFIG_SMP) += smp.o spinlock.o -obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
\ No newline at end of file +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o +obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f09e8c36ee80..0a1700a2be9c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -20,6 +20,7 @@ #include <linux/delay.h> #include <linux/start_kernel.h> #include <linux/sched.h> +#include <linux/kprobes.h> #include <linux/bootmem.h> #include <linux/module.h> #include <linux/mm.h> @@ -44,6 +45,7 @@ #include <asm/processor.h> #include <asm/proto.h> #include <asm/msr-index.h> +#include <asm/traps.h> #include <asm/setup.h> #include <asm/desc.h> #include <asm/pgtable.h> @@ -240,10 +242,10 @@ static unsigned long xen_get_debugreg(int reg) return HYPERVISOR_get_debugreg(reg); } -void xen_leave_lazy(void) +static void xen_end_context_switch(struct task_struct *next) { - paravirt_leave_lazy(paravirt_get_lazy_mode()); xen_mc_flush(); + paravirt_end_context_switch(next); } static unsigned long xen_store_tr(void) @@ -428,11 +430,44 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, static int cvt_gate_to_trap(int vector, const gate_desc *val, struct trap_info *info) { + unsigned long addr; + if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT) return 0; info->vector = vector; - info->address = gate_offset(*val); + + addr = gate_offset(*val); +#ifdef CONFIG_X86_64 + /* + * Look for known traps using IST, and substitute them + * appropriately. The debugger ones are the only ones we care + * about. Xen will handle faults like double_fault and + * machine_check, so we should never see them. Warn if + * there's an unexpected IST-using fault handler. + */ + if (addr == (unsigned long)debug) + addr = (unsigned long)xen_debug; + else if (addr == (unsigned long)int3) + addr = (unsigned long)xen_int3; + else if (addr == (unsigned long)stack_segment) + addr = (unsigned long)xen_stack_segment; + else if (addr == (unsigned long)double_fault || + addr == (unsigned long)nmi) { + /* Don't need to handle these */ + return 0; +#ifdef CONFIG_X86_MCE + } else if (addr == (unsigned long)machine_check) { + return 0; +#endif + } else { + /* Some other trap using IST? */ + if (WARN_ON(val->ist != 0)) + return 0; + } +#endif /* CONFIG_X86_64 */ + info->address = addr; + info->cs = gate_segment(*val); info->flags = val->dpl; /* interrupt gates clear IF */ @@ -623,10 +658,26 @@ static void xen_clts(void) xen_mc_issue(PARAVIRT_LAZY_CPU); } +static DEFINE_PER_CPU(unsigned long, xen_cr0_value); + +static unsigned long xen_read_cr0(void) +{ + unsigned long cr0 = percpu_read(xen_cr0_value); + + if (unlikely(cr0 == 0)) { + cr0 = native_read_cr0(); + percpu_write(xen_cr0_value, cr0); + } + + return cr0; +} + static void xen_write_cr0(unsigned long cr0) { struct multicall_space mcs; + percpu_write(xen_cr0_value, cr0); + /* Only pay attention to cr0.TS; everything else is ignored. */ mcs = xen_mc_entry(0); @@ -812,7 +863,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .clts = xen_clts, - .read_cr0 = native_read_cr0, + .read_cr0 = xen_read_cr0, .write_cr0 = xen_write_cr0, .read_cr4 = native_read_cr4, @@ -860,10 +911,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { /* Xen takes care of %gs when switching to usermode for us */ .swapgs = paravirt_nop, - .lazy_mode = { - .enter = paravirt_enter_lazy_cpu, - .leave = xen_leave_lazy, - }, + .start_context_switch = paravirt_start_context_switch, + .end_context_switch = xen_end_context_switch, }; static const struct pv_apic_ops xen_apic_ops __initdata = { diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index e25a78e1113a..4ceb28581652 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -42,6 +42,7 @@ #include <linux/highmem.h> #include <linux/debugfs.h> #include <linux/bug.h> +#include <linux/module.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> @@ -451,10 +452,6 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { - /* updates to init_mm may be done without lock */ - if (mm == &init_mm) - preempt_disable(); - ADD_STATS(set_pte_at, 1); // ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); ADD_STATS(set_pte_at_current, mm == current->mm); @@ -475,9 +472,7 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, } xen_set_pte(ptep, pteval); -out: - if (mm == &init_mm) - preempt_enable(); +out: return; } pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, @@ -1151,10 +1146,8 @@ static void drop_other_mm_ref(void *info) /* If this cpu still has a stale cr3 reference, then make sure it has been flushed. */ - if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) { + if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) load_cr3(swapper_pg_dir); - arch_flush_lazy_cpu_mode(); - } } static void xen_drop_mm_ref(struct mm_struct *mm) @@ -1167,7 +1160,6 @@ static void xen_drop_mm_ref(struct mm_struct *mm) load_cr3(swapper_pg_dir); else leave_mm(smp_processor_id()); - arch_flush_lazy_cpu_mode(); } /* Get the "official" set of cpus referring to our pagetable. */ @@ -1875,6 +1867,14 @@ __init void xen_post_allocator_init(void) xen_mark_init_mm_pinned(); } +static void xen_leave_lazy_mmu(void) +{ + preempt_disable(); + xen_mc_flush(); + paravirt_leave_lazy_mmu(); + preempt_enable(); +} + const struct pv_mmu_ops xen_mmu_ops __initdata = { .pagetable_setup_start = xen_pagetable_setup_start, .pagetable_setup_done = xen_pagetable_setup_done, @@ -1948,7 +1948,7 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { .lazy_mode = { .enter = paravirt_enter_lazy_mmu, - .leave = xen_leave_lazy, + .leave = xen_leave_lazy_mmu, }, .set_fixmap = xen_set_fixmap, diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 15c6c68db6a2..ad0047f47cd4 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -61,9 +61,9 @@ char * __init xen_memory_setup(void) * - xen_start_info * See comment above "struct start_info" in <xen/interface/xen.h> */ - e820_add_region(__pa(xen_start_info->mfn_list), - xen_start_info->pt_base - xen_start_info->mfn_list, - E820_RESERVED); + reserve_early(__pa(xen_start_info->mfn_list), + __pa(xen_start_info->pt_base), + "XEN START INFO"); sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 20139464943c..22494fd4c9b5 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -30,7 +30,6 @@ pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_ident_map_ISA(void); void xen_reserve_top(void); -void xen_leave_lazy(void); void xen_post_allocator_init(void); char * __init xen_memory_setup(void); @@ -62,15 +61,26 @@ void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP void xen_smp_init(void); -void __init xen_init_spinlocks(void); -__cpuinit void xen_init_lock_cpu(int cpu); -void xen_uninit_lock_cpu(int cpu); - extern cpumask_var_t xen_cpu_initialized_map; #else static inline void xen_smp_init(void) {} #endif +#ifdef CONFIG_PARAVIRT_SPINLOCKS +void __init xen_init_spinlocks(void); +__cpuinit void xen_init_lock_cpu(int cpu); +void xen_uninit_lock_cpu(int cpu); +#else +static inline void xen_init_spinlocks(void) +{ +} +static inline void xen_init_lock_cpu(int cpu) +{ +} +static inline void xen_uninit_lock_cpu(int cpu) +{ +} +#endif /* Declare an asm function, along with symbols needed to make it inlineable */ diff --git a/block/bsg.c b/block/bsg.c index 206060e795da..dd81be455e00 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -315,6 +315,7 @@ out: blk_put_request(rq); if (next_rq) { blk_rq_unmap_user(next_rq->bio); + next_rq->bio = NULL; blk_put_request(next_rq); } return ERR_PTR(ret); @@ -448,6 +449,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, hdr->dout_resid = rq->data_len; hdr->din_resid = rq->next_rq->data_len; blk_rq_unmap_user(bidi_bio); + rq->next_rq->bio = NULL; blk_put_request(rq->next_rq); } else if (rq_data_dir(rq) == READ) hdr->din_resid = rq->data_len; @@ -466,6 +468,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, blk_rq_unmap_user(bio); if (rq->cmd != rq->__cmd) kfree(rq->cmd); + rq->bio = NULL; blk_put_request(rq); return ret; diff --git a/crypto/ahash.c b/crypto/ahash.c index b2d1ee32cfe8..f3476374f764 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -82,10 +82,11 @@ int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err) if (err) return err; - walk->offset = 0; - - if (nbytes) + if (nbytes) { + walk->offset = 0; + walk->pg++; return hash_walk_next(walk); + } if (!walk->total) return 0; diff --git a/crypto/api.c b/crypto/api.c index 314dab96840e..fd2545decb28 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -221,7 +221,8 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask) request_module(name); - if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask) && + if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask & + CRYPTO_ALG_NEED_FALLBACK) && snprintf(tmp, sizeof(tmp), "%s-all", name) < sizeof(tmp)) request_module(tmp); diff --git a/crypto/eseqiv.c b/crypto/eseqiv.c index 2a342c8e52b3..3ca3b669d5d5 100644 --- a/crypto/eseqiv.c +++ b/crypto/eseqiv.c @@ -153,7 +153,8 @@ static int eseqiv_givencrypt(struct skcipher_givcrypt_request *req) if (err) goto out; - eseqiv_complete2(req); + if (giv != req->giv) + eseqiv_complete2(req); out: return err; diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile index 17e50824a6f1..72ac28da14e3 100644 --- a/drivers/acpi/acpica/Makefile +++ b/drivers/acpi/acpica/Makefile @@ -5,40 +5,43 @@ ccflags-y := -Os ccflags-$(CONFIG_ACPI_DEBUG) += -DACPI_DEBUG_OUTPUT -obj-y := dsfield.o dsmthdat.o dsopcode.o dswexec.o dswscope.o \ +# use acpi.o to put all files here into acpi.o modparam namespace +obj-y += acpi.o + +acpi-y := dsfield.o dsmthdat.o dsopcode.o dswexec.o dswscope.o \ dsmethod.o dsobject.o dsutils.o dswload.o dswstate.o \ dsinit.o -obj-y += evevent.o evregion.o evsci.o evxfevnt.o \ +acpi-y += evevent.o evregion.o evsci.o evxfevnt.o \ evmisc.o evrgnini.o evxface.o evxfregn.o \ evgpe.o evgpeblk.o -obj-y += exconfig.o exfield.o exnames.o exoparg6.o exresolv.o exstorob.o\ +acpi-y += exconfig.o exfield.o exnames.o exoparg6.o exresolv.o exstorob.o\ exconvrt.o exfldio.o exoparg1.o exprep.o exresop.o exsystem.o\ excreate.o exmisc.o exoparg2.o exregion.o exstore.o exutils.o \ exdump.o exmutex.o exoparg3.o exresnte.o exstoren.o -obj-y += hwacpi.o hwgpe.o hwregs.o hwsleep.o hwxface.o hwvalid.o +acpi-y += hwacpi.o hwgpe.o hwregs.o hwsleep.o hwxface.o hwvalid.o -obj-$(ACPI_FUTURE_USAGE) += hwtimer.o +acpi-$(ACPI_FUTURE_USAGE) += hwtimer.o -obj-y += nsaccess.o nsload.o nssearch.o nsxfeval.o \ +acpi-y += nsaccess.o nsload.o nssearch.o nsxfeval.o \ nsalloc.o nseval.o nsnames.o nsutils.o nsxfname.o \ nsdump.o nsinit.o nsobject.o nswalk.o nsxfobj.o \ nsparse.o nspredef.o -obj-$(ACPI_FUTURE_USAGE) += nsdumpdv.o +acpi-$(ACPI_FUTURE_USAGE) += nsdumpdv.o -obj-y += psargs.o psparse.o psloop.o pstree.o pswalk.o \ +acpi-y += psargs.o psparse.o psloop.o pstree.o pswalk.o \ psopcode.o psscope.o psutils.o psxface.o -obj-y += rsaddr.o rscreate.o rsinfo.o rsio.o rslist.o rsmisc.o rsxface.o \ +acpi-y += rsaddr.o rscreate.o rsinfo.o rsio.o rslist.o rsmisc.o rsxface.o \ rscalc.o rsirq.o rsmemory.o rsutils.o -obj-$(ACPI_FUTURE_USAGE) += rsdump.o +acpi-$(ACPI_FUTURE_USAGE) += rsdump.o -obj-y += tbxface.o tbinstal.o tbutils.o tbfind.o tbfadt.o tbxfroot.o +acpi-y += tbxface.o tbinstal.o tbutils.o tbfind.o tbfadt.o tbxfroot.o -obj-y += utalloc.o utdebug.o uteval.o utinit.o utmisc.o utxface.o \ +acpi-y += utalloc.o utdebug.o uteval.o utinit.o utmisc.o utxface.o \ utcopy.o utdelete.o utglobal.o utmath.o utobject.o \ utstate.o utmutex.o utobject.o utresrc.o utlock.o diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index 772ee5c4ccca..2ec394a328e9 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -787,7 +787,12 @@ struct acpi_bit_register_info { /* For control registers, both ignored and reserved bits must be preserved */ -#define ACPI_PM1_CONTROL_IGNORED_BITS 0x0201 /* Bits 9, 0(SCI_EN) */ +/* + * The ACPI spec says to ignore PM1_CTL.SCI_EN (bit 0) + * but we need to be able to write ACPI_BITREG_SCI_ENABLE directly + * as a BIOS workaround on some machines. + */ +#define ACPI_PM1_CONTROL_IGNORED_BITS 0x0200 /* Bits 9 */ #define ACPI_PM1_CONTROL_RESERVED_BITS 0xC1F8 /* Bits 14-15, 3-8 */ #define ACPI_PM1_CONTROL_PRESERVED_BITS \ (ACPI_PM1_CONTROL_IGNORED_BITS | ACPI_PM1_CONTROL_RESERVED_BITS) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index e8f7b64e92da..ae862f1798dc 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -312,7 +312,7 @@ int acpi_bus_set_power(acpi_handle handle, int state) end: if (result) printk(KERN_WARNING PREFIX - "Transitioning device [%s] to D%d\n", + "Device [%s] failed to transition to D%d\n", device->pnp.bus_id, state); else { device->power.state = state; diff --git a/drivers/acpi/pci_bind.c b/drivers/acpi/pci_bind.c index 95650f83ce2e..bc46de3d967f 100644 --- a/drivers/acpi/pci_bind.c +++ b/drivers/acpi/pci_bind.c @@ -116,9 +116,6 @@ int acpi_pci_bind(struct acpi_device *device) struct acpi_pci_data *pdata; struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; acpi_handle handle; - struct pci_dev *dev; - struct pci_bus *bus; - if (!device || !device->parent) return -EINVAL; @@ -176,20 +173,9 @@ int acpi_pci_bind(struct acpi_device *device) * Locate matching device in PCI namespace. If it doesn't exist * this typically means that the device isn't currently inserted * (e.g. docking station, port replicator, etc.). - * We cannot simply search the global pci device list, since - * PCI devices are added to the global pci list when the root - * bridge start ops are run, which may not have happened yet. */ - bus = pci_find_bus(data->id.segment, data->id.bus); - if (bus) { - list_for_each_entry(dev, &bus->devices, bus_list) { - if (dev->devfn == PCI_DEVFN(data->id.device, - data->id.function)) { - data->dev = dev; - break; - } - } - } + data->dev = pci_get_slot(pdata->bus, + PCI_DEVFN(data->id.device, data->id.function)); if (!data->dev) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device %04x:%02x:%02x.%d not present in PCI namespace\n", @@ -259,9 +245,10 @@ int acpi_pci_bind(struct acpi_device *device) end: kfree(buffer.pointer); - if (result) + if (result) { + pci_dev_put(data->dev); kfree(data); - + } return result; } @@ -303,6 +290,7 @@ static int acpi_pci_unbind(struct acpi_device *device) if (data->dev->subordinate) { acpi_pci_irq_del_prt(data->id.segment, data->bus->number); } + pci_dev_put(data->dev); kfree(data); end: diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 51b9f8280f88..2faa9e2ac893 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -401,7 +401,8 @@ int acpi_pci_irq_enable(struct pci_dev *dev) /* Interrupt Line values above 0xF are forbidden */ if (dev->irq > 0 && (dev->irq <= 0xF)) { printk(" - using IRQ %d\n", dev->irq); - acpi_register_gsi(dev->irq, ACPI_LEVEL_SENSITIVE, + acpi_register_gsi(&dev->dev, dev->irq, + ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW); return 0; } else { @@ -410,7 +411,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev) } } - rc = acpi_register_gsi(gsi, triggering, polarity); + rc = acpi_register_gsi(&dev->dev, gsi, triggering, polarity); if (rc < 0) { dev_warn(&dev->dev, "PCI INT %c: failed to register GSI\n", pin_name(pin)); diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 45ad3288c5ff..23f0fb84f1c1 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -844,7 +844,7 @@ static int acpi_processor_add(struct acpi_device *device) if (!pr) return -ENOMEM; - if (!alloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) { + if (!zalloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) { kfree(pr); return -ENOMEM; } diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index f7ca8c55956b..10a2d913635a 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -148,6 +148,9 @@ static void acpi_timer_check_state(int state, struct acpi_processor *pr, if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT)) return; + if (boot_cpu_has(X86_FEATURE_AMDC1E)) + type = ACPI_STATE_C1; + /* * Check, if one of the previous states already marked the lapic * unstable @@ -202,21 +205,44 @@ static void acpi_state_timer_broadcast(struct acpi_processor *pr, * Suspend / resume control */ static int acpi_idle_suspend; +static u32 saved_bm_rld; + +static void acpi_idle_bm_rld_save(void) +{ + acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &saved_bm_rld); +} +static void acpi_idle_bm_rld_restore(void) +{ + u32 resumed_bm_rld; + + acpi_read_bit_register(ACPI_BITREG_BUS_MASTER_RLD, &resumed_bm_rld); + + if (resumed_bm_rld != saved_bm_rld) + acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, saved_bm_rld); +} int acpi_processor_suspend(struct acpi_device * device, pm_message_t state) { + if (acpi_idle_suspend == 1) + return 0; + + acpi_idle_bm_rld_save(); acpi_idle_suspend = 1; return 0; } int acpi_processor_resume(struct acpi_device * device) { + if (acpi_idle_suspend == 0) + return 0; + + acpi_idle_bm_rld_restore(); acpi_idle_suspend = 0; return 0; } #if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86) -static int tsc_halts_in_c(int state) +static void tsc_check_state(int state) { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: @@ -226,13 +252,17 @@ static int tsc_halts_in_c(int state) * C/P/S0/S1 states when this bit is set. */ if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) - return 0; + return; /*FALL THROUGH*/ default: - return state > ACPI_STATE_C1; + /* TSC could halt in idle, so notify users */ + if (state > ACPI_STATE_C1) + mark_tsc_unstable("TSC halts in idle"); } } +#else +static void tsc_check_state(int state) { return; } #endif static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr) @@ -578,17 +608,13 @@ static int acpi_processor_power_verify(struct acpi_processor *pr) pr->power.timer_broadcast_on_state = INT_MAX; - for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { + for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) { struct acpi_processor_cx *cx = &pr->power.states[i]; -#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86) - /* TSC could halt in idle, so notify users */ - if (tsc_halts_in_c(cx->type)) - mark_tsc_unstable("TSC halts in idle");; -#endif switch (cx->type) { case ACPI_STATE_C1: cx->valid = 1; + acpi_timer_check_state(i, pr, cx); break; case ACPI_STATE_C2: @@ -603,6 +629,8 @@ static int acpi_processor_power_verify(struct acpi_processor *pr) acpi_timer_check_state(i, pr, cx); break; } + if (cx->valid) + tsc_check_state(cx->type); if (cx->valid) working++; @@ -806,11 +834,12 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, /* Do not access any ACPI IO ports in suspend path */ if (acpi_idle_suspend) { - acpi_safe_halt(); local_irq_enable(); + cpu_relax(); return 0; } + acpi_state_timer_broadcast(pr, cx, 1); kt1 = ktime_get_real(); acpi_idle_do_entry(cx); kt2 = ktime_get_real(); @@ -818,6 +847,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, local_irq_enable(); cx->usage++; + acpi_state_timer_broadcast(pr, cx, 0); return idle_time; } diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index cafb41000f6b..60e543d3234e 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -309,9 +309,15 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr) (u32) px->bus_master_latency, (u32) px->control, (u32) px->status)); - if (!px->core_frequency) { - printk(KERN_ERR PREFIX - "Invalid _PSS data: freq is zero\n"); + /* + * Check that ACPI's u64 MHz will be valid as u32 KHz in cpufreq + */ + if (!px->core_frequency || + ((u32)(px->core_frequency * 1000) != + (px->core_frequency * 1000))) { + printk(KERN_ERR FW_BUG PREFIX + "Invalid BIOS _PSS frequency: 0x%llx MHz\n", + px->core_frequency); result = -EFAULT; kfree(pr->performance->states); goto end; diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index d0d1f4d50434..227543789ba9 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -45,6 +45,14 @@ #define _COMPONENT ACPI_PROCESSOR_COMPONENT ACPI_MODULE_NAME("processor_throttling"); +/* ignore_tpc: + * 0 -> acpi processor driver doesn't ignore _TPC values + * 1 -> acpi processor driver ignores _TPC values + */ +static int ignore_tpc; +module_param(ignore_tpc, int, 0644); +MODULE_PARM_DESC(ignore_tpc, "Disable broken BIOS _TPC throttling support"); + struct throttling_tstate { unsigned int cpu; /* cpu nr */ int target_state; /* target T-state */ @@ -283,6 +291,10 @@ static int acpi_processor_get_platform_limit(struct acpi_processor *pr) if (!pr) return -EINVAL; + + if (ignore_tpc) + goto end; + status = acpi_evaluate_integer(pr->handle, "_TPC", NULL, &tpc); if (ACPI_FAILURE(status)) { if (status != AE_NOT_FOUND) { @@ -290,6 +302,8 @@ static int acpi_processor_get_platform_limit(struct acpi_processor *pr) } return -ENODEV; } + +end: pr->throttling_platform_limit = (int)tpc; return 0; } @@ -302,6 +316,9 @@ int acpi_processor_tstate_has_changed(struct acpi_processor *pr) struct acpi_processor_limit *limit; int target_state; + if (ignore_tpc) + return 0; + result = acpi_processor_get_platform_limit(pr); if (result) { /* Throttling Limit is unsupported */ @@ -821,6 +838,14 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr) ret = acpi_read_throttling_status(pr, &value); if (ret >= 0) { state = acpi_get_throttling_state(pr, value); + if (state == -1) { + ACPI_WARNING((AE_INFO, + "Invalid throttling state, reset")); + state = 0; + ret = acpi_processor_set_throttling(pr, state); + if (ret) + return ret; + } pr->throttling.state = state; } diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index d7ff61c0d571..1bdfb37377e3 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -538,6 +538,57 @@ acpi_video_device_lcd_set_level(struct acpi_video_device *device, int level) return -EINVAL; } +/* + * For some buggy _BQC methods, we need to add a constant value to + * the _BQC return value to get the actual current brightness level + */ + +static int bqc_offset_aml_bug_workaround; +static int __init video_set_bqc_offset(const struct dmi_system_id *d) +{ + bqc_offset_aml_bug_workaround = 9; + return 0; +} + +static struct dmi_system_id video_dmi_table[] __initdata = { + /* + * Broken _BQC workaround http://bugzilla.kernel.org/show_bug.cgi?id=13121 + */ + { + .callback = video_set_bqc_offset, + .ident = "Acer Aspire 5720", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5720"), + }, + }, + { + .callback = video_set_bqc_offset, + .ident = "Acer Aspire 5710Z", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5710Z"), + }, + }, + { + .callback = video_set_bqc_offset, + .ident = "eMachines E510", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "EMACHINES"), + DMI_MATCH(DMI_PRODUCT_NAME, "eMachines E510"), + }, + }, + { + .callback = video_set_bqc_offset, + .ident = "Acer Aspire 5315", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5315"), + }, + }, + {} +}; + static int acpi_video_device_lcd_get_level_current(struct acpi_video_device *device, unsigned long long *level) @@ -557,6 +608,7 @@ acpi_video_device_lcd_get_level_current(struct acpi_video_device *device, *level = device->brightness->levels[*level + 2]; } + *level += bqc_offset_aml_bug_workaround; device->brightness->curr = *level; return 0; } else { @@ -2290,13 +2342,15 @@ EXPORT_SYMBOL(acpi_video_register); static int __init acpi_video_init(void) { + dmi_check_system(video_dmi_table); + if (intel_opregion_present()) return 0; return acpi_video_register(); } -void __exit acpi_video_exit(void) +void acpi_video_exit(void) { acpi_bus_unregister_driver(&acpi_video_bus); diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 08186ecbaf8d..6b91c26a4635 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -220,6 +220,7 @@ enum { AHCI_HFLAG_NO_HOTPLUG = (1 << 7), /* ignore PxSERR.DIAG.N */ AHCI_HFLAG_SECT255 = (1 << 8), /* max 255 sectors */ AHCI_HFLAG_YES_NCQ = (1 << 9), /* force NCQ cap on */ + AHCI_HFLAG_NO_SUSPEND = (1 << 10), /* don't suspend */ /* ap->flags bits */ @@ -2316,9 +2317,17 @@ static int ahci_port_suspend(struct ata_port *ap, pm_message_t mesg) static int ahci_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg) { struct ata_host *host = dev_get_drvdata(&pdev->dev); + struct ahci_host_priv *hpriv = host->private_data; void __iomem *mmio = host->iomap[AHCI_PCI_BAR]; u32 ctl; + if (mesg.event & PM_EVENT_SUSPEND && + hpriv->flags & AHCI_HFLAG_NO_SUSPEND) { + dev_printk(KERN_ERR, &pdev->dev, + "BIOS update required for suspend/resume\n"); + return -EIO; + } + if (mesg.event & PM_EVENT_SLEEP) { /* AHCI spec rev1.1 section 8.3.3: * Software must disable interrupts prior to requesting a @@ -2610,6 +2619,63 @@ static bool ahci_broken_system_poweroff(struct pci_dev *pdev) return false; } +static bool ahci_broken_suspend(struct pci_dev *pdev) +{ + static const struct dmi_system_id sysids[] = { + /* + * On HP dv[4-6] and HDX18 with earlier BIOSen, link + * to the harddisk doesn't become online after + * resuming from STR. Warn and fail suspend. + */ + { + .ident = "dv4", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, + "HP Pavilion dv4 Notebook PC"), + }, + .driver_data = "F.30", /* cutoff BIOS version */ + }, + { + .ident = "dv5", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, + "HP Pavilion dv5 Notebook PC"), + }, + .driver_data = "F.16", /* cutoff BIOS version */ + }, + { + .ident = "dv6", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, + "HP Pavilion dv6 Notebook PC"), + }, + .driver_data = "F.21", /* cutoff BIOS version */ + }, + { + .ident = "HDX18", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, + "HP HDX18 Notebook PC"), + }, + .driver_data = "F.23", /* cutoff BIOS version */ + }, + { } /* terminate list */ + }; + const struct dmi_system_id *dmi = dmi_first_match(sysids); + const char *ver; + + if (!dmi || pdev->bus->number || pdev->devfn != PCI_DEVFN(0x1f, 2)) + return false; + + ver = dmi_get_system_info(DMI_BIOS_VERSION); + + return !ver || strcmp(ver, dmi->driver_data) < 0; +} + static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { static int printed_version; @@ -2715,6 +2781,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) "quirky BIOS, skipping spindown on poweroff\n"); } + if (ahci_broken_suspend(pdev)) { + hpriv->flags |= AHCI_HFLAG_NO_SUSPEND; + dev_printk(KERN_WARNING, &pdev->dev, + "BIOS update required for suspend/resume\n"); + } + /* CAP.NP sometimes indicate the index of the last enabled * port, at other times, that of the last possible port, so * determining the maximum port number requires looking at diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index d51a17c0f59b..1aeb7082b0c4 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -1455,6 +1455,15 @@ static bool piix_broken_system_poweroff(struct pci_dev *pdev) /* PCI slot number of the controller */ .driver_data = (void *)0x1FUL, }, + { + .ident = "HP Compaq nc6000", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nc6000"), + }, + /* PCI slot number of the controller */ + .driver_data = (void *)0x1FUL, + }, { } /* terminate list */ }; diff --git a/drivers/ata/pata_ali.c b/drivers/ata/pata_ali.c index 751b7ea4816c..fc9c5d6d7d80 100644 --- a/drivers/ata/pata_ali.c +++ b/drivers/ata/pata_ali.c @@ -497,14 +497,16 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id) }; /* Revision 0x20 added DMA */ static const struct ata_port_info info_20 = { - .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 | + ATA_FLAG_IGN_SIMPLEX, .pio_mask = ATA_PIO4, .mwdma_mask = ATA_MWDMA2, .port_ops = &ali_20_port_ops }; /* Revision 0x20 with support logic added UDMA */ static const struct ata_port_info info_20_udma = { - .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 | + ATA_FLAG_IGN_SIMPLEX, .pio_mask = ATA_PIO4, .mwdma_mask = ATA_MWDMA2, .udma_mask = ATA_UDMA2, @@ -512,7 +514,8 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id) }; /* Revision 0xC2 adds UDMA66 */ static const struct ata_port_info info_c2 = { - .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 | + ATA_FLAG_IGN_SIMPLEX, .pio_mask = ATA_PIO4, .mwdma_mask = ATA_MWDMA2, .udma_mask = ATA_UDMA4, @@ -520,7 +523,8 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id) }; /* Revision 0xC3 is UDMA66 for now */ static const struct ata_port_info info_c3 = { - .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 | + ATA_FLAG_IGN_SIMPLEX, .pio_mask = ATA_PIO4, .mwdma_mask = ATA_MWDMA2, .udma_mask = ATA_UDMA4, @@ -528,7 +532,8 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id) }; /* Revision 0xC4 is UDMA100 */ static const struct ata_port_info info_c4 = { - .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 | + ATA_FLAG_IGN_SIMPLEX, .pio_mask = ATA_PIO4, .mwdma_mask = ATA_MWDMA2, .udma_mask = ATA_UDMA5, @@ -536,7 +541,7 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id) }; /* Revision 0xC5 is UDMA133 with LBA48 DMA */ static const struct ata_port_info info_c5 = { - .flags = ATA_FLAG_SLAVE_POSS, + .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_IGN_SIMPLEX, .pio_mask = ATA_PIO4, .mwdma_mask = ATA_MWDMA2, .udma_mask = ATA_UDMA6, diff --git a/drivers/ata/pata_efar.c b/drivers/ata/pata_efar.c index 2085e0a3a05a..2a6412f5d117 100644 --- a/drivers/ata/pata_efar.c +++ b/drivers/ata/pata_efar.c @@ -22,7 +22,7 @@ #include <linux/ata.h> #define DRV_NAME "pata_efar" -#define DRV_VERSION "0.4.4" +#define DRV_VERSION "0.4.5" /** * efar_pre_reset - Enable bits @@ -98,18 +98,17 @@ static void efar_set_piomode (struct ata_port *ap, struct ata_device *adev) { 2, 1 }, { 2, 3 }, }; - if (pio > 2) - control |= 1; /* TIME1 enable */ + if (pio > 1) + control |= 1; /* TIME */ if (ata_pio_need_iordy(adev)) /* PIO 3/4 require IORDY */ - control |= 2; /* IE enable */ - /* Intel specifies that the PPE functionality is for disk only */ + control |= 2; /* IE */ + /* Intel specifies that the prefetch/posting is for disk only */ if (adev->class == ATA_DEV_ATA) - control |= 4; /* PPE enable */ + control |= 4; /* PPE */ pci_read_config_word(dev, idetm_port, &idetm_data); - /* Enable PPE, IE and TIME as appropriate */ - + /* Set PPE, IE, and TIME as appropriate */ if (adev->devno == 0) { idetm_data &= 0xCCF0; idetm_data |= control; @@ -129,7 +128,7 @@ static void efar_set_piomode (struct ata_port *ap, struct ata_device *adev) pci_write_config_byte(dev, 0x44, slave_data); } - idetm_data |= 0x4000; /* Ensure SITRE is enabled */ + idetm_data |= 0x4000; /* Ensure SITRE is set */ pci_write_config_word(dev, idetm_port, idetm_data); } diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c index f72c6c5b820f..6932e56d179c 100644 --- a/drivers/ata/pata_legacy.c +++ b/drivers/ata/pata_legacy.c @@ -48,6 +48,7 @@ * */ +#include <linux/async.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/pci.h> @@ -1028,6 +1029,7 @@ static __init int legacy_init_one(struct legacy_probe *probe) &legacy_sht); if (ret) goto fail; + async_synchronize_full(); ld->platform_dev = pdev; /* Nothing found means we drop the port as its probably not there */ diff --git a/drivers/ata/pata_netcell.c b/drivers/ata/pata_netcell.c index bdb236957cb9..f0d52f72f5bb 100644 --- a/drivers/ata/pata_netcell.c +++ b/drivers/ata/pata_netcell.c @@ -20,13 +20,24 @@ /* No PIO or DMA methods needed for this device */ +static unsigned int netcell_read_id(struct ata_device *adev, + struct ata_taskfile *tf, u16 *id) +{ + unsigned int err_mask = ata_do_dev_read_id(adev, tf, id); + /* Firmware forgets to mark words 85-87 valid */ + if (err_mask == 0) + id[ATA_ID_CSF_DEFAULT] |= 0x4000; + return err_mask; +} + static struct scsi_host_template netcell_sht = { ATA_BMDMA_SHT(DRV_NAME), }; static struct ata_port_operations netcell_ops = { .inherits = &ata_bmdma_port_ops, - .cable_detect = ata_cable_80wire, + .cable_detect = ata_cable_80wire, + .read_id = netcell_read_id, }; diff --git a/drivers/base/bus.c b/drivers/base/bus.c index dc030f1f00f1..c6599618523e 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -700,8 +700,10 @@ int bus_add_driver(struct device_driver *drv) } kobject_uevent(&priv->kobj, KOBJ_ADD); - return error; + return 0; out_unregister: + kfree(drv->p); + drv->p = NULL; kobject_put(&priv->kobj); out_put_bus: bus_put(bus); diff --git a/drivers/base/core.c b/drivers/base/core.c index 4aa527b8a913..1977d4beb89e 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -879,7 +879,7 @@ int device_add(struct device *dev) } if (!dev_name(dev)) - goto done; + goto name_error; pr_debug("device: '%s': %s\n", dev_name(dev), __func__); @@ -978,6 +978,9 @@ done: cleanup_device_parent(dev); if (parent) put_device(parent); +name_error: + kfree(dev->p); + dev->p = NULL; goto done; } diff --git a/drivers/base/driver.c b/drivers/base/driver.c index c51f11bb29ae..8ae0f63602e0 100644 --- a/drivers/base/driver.c +++ b/drivers/base/driver.c @@ -257,6 +257,10 @@ EXPORT_SYMBOL_GPL(driver_register); */ void driver_unregister(struct device_driver *drv) { + if (!drv || !drv->p) { + WARN(1, "Unexpected driver unregister!\n"); + return; + } driver_remove_groups(drv, drv->groups); bus_remove_driver(drv); } diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 69b4ddb7de3b..3e4bc699bc0f 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -357,6 +357,7 @@ static void dpm_power_up(pm_message_t state) { struct device *dev; + mutex_lock(&dpm_list_mtx); list_for_each_entry(dev, &dpm_list, power.entry) if (dev->power.status > DPM_OFF) { int error; @@ -366,6 +367,7 @@ static void dpm_power_up(pm_message_t state) if (error) pm_dev_err(dev, state, " early", error); } + mutex_unlock(&dpm_list_mtx); } /** @@ -614,6 +616,7 @@ int device_power_down(pm_message_t state) int error = 0; suspend_device_irqs(); + mutex_lock(&dpm_list_mtx); list_for_each_entry_reverse(dev, &dpm_list, power.entry) { error = suspend_device_noirq(dev, state); if (error) { @@ -622,6 +625,7 @@ int device_power_down(pm_message_t state) } dev->power.status = DPM_OFF_IRQ; } + mutex_unlock(&dpm_list_mtx); if (error) device_power_up(resume_event(state)); return error; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 8f905089b72b..a6cbf7b808e6 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -934,8 +934,6 @@ static void blkfront_closing(struct xenbus_device *dev) spin_lock_irqsave(&blkif_io_lock, flags); - del_gendisk(info->gd); - /* No more blkif_request(). */ blk_stop_queue(info->rq); @@ -949,6 +947,8 @@ static void blkfront_closing(struct xenbus_device *dev) blk_cleanup_queue(info->rq); info->rq = NULL; + del_gendisk(info->gd); + out: xenbus_frontend_closed(dev); } @@ -977,8 +977,10 @@ static void backend_changed(struct xenbus_device *dev, break; case XenbusStateClosing: - if (info->gd == NULL) - xenbus_dev_fatal(dev, -ENODEV, "gd is NULL"); + if (info->gd == NULL) { + xenbus_frontend_closed(dev); + break; + } bd = bdget_disk(info->gd, 0); if (bd == NULL) xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index 13929356135c..9b1624e0ddeb 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -587,7 +587,7 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id) struct device_node *node = vdev->dev.archdata.of_node; deviceno = vdev->unit_address; - if (deviceno > VIOCD_MAX_CD) + if (deviceno >= VIOCD_MAX_CD) return -ENODEV; if (!node) return -ENODEV; diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 340ba4f9dc54..4a9f3492b921 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -224,7 +224,7 @@ static void hpet_timer_set_irq(struct hpet_dev *devp) break; } - gsi = acpi_register_gsi(irq, ACPI_LEVEL_SENSITIVE, + gsi = acpi_register_gsi(NULL, irq, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW); if (gsi > 0) break; @@ -939,7 +939,7 @@ static acpi_status hpet_resources(struct acpi_resource *res, void *data) irqp = &res->data.extended_irq; for (i = 0; i < irqp->interrupt_count; i++) { - irq = acpi_register_gsi(irqp->interrupts[i], + irq = acpi_register_gsi(NULL, irqp->interrupts[i], irqp->triggering, irqp->polarity); if (irq < 0) return AE_ERROR; diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index aa83a0865ec1..09050797c76a 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -2856,6 +2856,7 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, /* Assume a single IPMB channel at zero. */ intf->channels[0].medium = IPMI_CHANNEL_MEDIUM_IPMB; intf->channels[0].protocol = IPMI_CHANNEL_PROTOCOL_IPMB; + intf->curr_channel = IPMI_MAX_CHANNELS; } if (rv == 0) @@ -3648,13 +3649,13 @@ static int handle_new_recv_msg(ipmi_smi_t intf, } /* - ** We need to make sure the channels have been initialized. - ** The channel_handler routine will set the "curr_channel" - ** equal to or greater than IPMI_MAX_CHANNELS when all the - ** channels for this interface have been initialized. - */ + * We need to make sure the channels have been initialized. + * The channel_handler routine will set the "curr_channel" + * equal to or greater than IPMI_MAX_CHANNELS when all the + * channels for this interface have been initialized. + */ if (intf->curr_channel < IPMI_MAX_CHANNELS) { - requeue = 1; /* Just put the message back for now */ + requeue = 0; /* Throw the message away */ goto out; } diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 8f05c38c2f06..f96d0bef855e 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -694,6 +694,8 @@ static ssize_t read_zero(struct file * file, char __user * buf, written += chunk - unwritten; if (unwritten) break; + if (signal_pending(current)) + return written ? written : -ERESTARTSYS; buf += chunk; count -= chunk; cond_resched(); diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c index a420e8d437dd..13f8871e5b21 100644 --- a/drivers/char/mxser.c +++ b/drivers/char/mxser.c @@ -2711,7 +2711,7 @@ static int __init mxser_module_init(void) continue; brd = &mxser_boards[m]; - retval = mxser_get_ISA_conf(!ioaddr[b], brd); + retval = mxser_get_ISA_conf(ioaddr[b], brd); if (retval <= 0) { brd->info = NULL; continue; diff --git a/drivers/char/random.c b/drivers/char/random.c index b2ced39d76b2..8c7444857a4b 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1673,7 +1673,7 @@ unsigned int get_random_int(void) int ret; keyptr = get_keyptr(); - hash[0] += current->pid + jiffies + get_cycles() + (int)(long)&ret; + hash[0] += current->pid + jiffies + get_cycles(); ret = half_md4_transform(hash, keyptr->secret); put_cpu_var(get_random_int_hash); diff --git a/drivers/char/tpm/tpm_bios.c b/drivers/char/tpm/tpm_bios.c index ed306eb1057f..0c2f55a38b95 100644 --- a/drivers/char/tpm/tpm_bios.c +++ b/drivers/char/tpm/tpm_bios.c @@ -212,7 +212,8 @@ static int get_event_name(char *dest, struct tcpa_event *event, unsigned char * event_entry) { const char *name = ""; - char data[40] = ""; + /* 41 so there is room for 40 data and 1 nul */ + char data[41] = ""; int i, n_len = 0, d_len = 0; struct tcpa_pc_event *pc_event; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d270e8eb3e67..6e2ec0b18948 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -808,7 +808,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) ret = -ENOMEM; goto nomem_out; } - if (!alloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) { + if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) { free_cpumask_var(policy->cpus); kfree(policy); ret = -ENOMEM; @@ -1070,11 +1070,11 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) spin_unlock_irqrestore(&cpufreq_driver_lock, flags); #endif + unlock_policy_rwsem_write(cpu); + if (cpufreq_driver->target) __cpufreq_governor(data, CPUFREQ_GOV_STOP); - unlock_policy_rwsem_write(cpu); - kobject_put(&data->kobj); /* we need to make sure that the underlying kobj is actually diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 2ecd95e4ab1a..7a74d175287b 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -91,6 +91,9 @@ static unsigned int dbs_enable; /* number of CPUs using this policy */ * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock * is recursive for the same process. -Venki + * DEADLOCK ALERT! (2) : do_dbs_timer() must not take the dbs_mutex, because it + * would deadlock with cancel_delayed_work_sync(), which is needed for proper + * raceless workqueue teardown. */ static DEFINE_MUTEX(dbs_mutex); @@ -542,7 +545,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) { dbs_info->enable = 0; - cancel_delayed_work(&dbs_info->work); + cancel_delayed_work_sync(&dbs_info->work); } static int cpufreq_governor_dbs(struct cpufreq_policy *policy, diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 338f428a15b7..e741c339df76 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -98,6 +98,9 @@ static unsigned int dbs_enable; /* number of CPUs using this policy */ * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock * is recursive for the same process. -Venki + * DEADLOCK ALERT! (2) : do_dbs_timer() must not take the dbs_mutex, because it + * would deadlock with cancel_delayed_work_sync(), which is needed for proper + * raceless workqueue teardown. */ static DEFINE_MUTEX(dbs_mutex); @@ -562,7 +565,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) { dbs_info->enable = 0; - cancel_delayed_work(&dbs_info->work); + cancel_delayed_work_sync(&dbs_info->work); } static int cpufreq_governor_dbs(struct cpufreq_policy *policy, diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index f9f05d7a707d..6c6656d3b1e2 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -415,6 +415,7 @@ static void crypto_done_action(unsigned long arg) static int init_ixp_crypto(void) { int ret = -ENODEV; + u32 msg[2] = { 0, 0 }; if (! ( ~(*IXP4XX_EXP_CFG2) & (IXP4XX_FEATURE_HASH | IXP4XX_FEATURE_AES | IXP4XX_FEATURE_DES))) { @@ -426,9 +427,35 @@ static int init_ixp_crypto(void) return ret; if (!npe_running(npe_c)) { - npe_load_firmware(npe_c, npe_name(npe_c), dev); + ret = npe_load_firmware(npe_c, npe_name(npe_c), dev); + if (ret) { + return ret; + } + if (npe_recv_message(npe_c, msg, "STATUS_MSG")) + goto npe_error; + } else { + if (npe_send_message(npe_c, msg, "STATUS_MSG")) + goto npe_error; + + if (npe_recv_message(npe_c, msg, "STATUS_MSG")) + goto npe_error; } + switch ((msg[1]>>16) & 0xff) { + case 3: + printk(KERN_WARNING "Firmware of %s lacks AES support\n", + npe_name(npe_c)); + support_aes = 0; + break; + case 4: + case 5: + support_aes = 1; + break; + default: + printk(KERN_ERR "Firmware of %s lacks crypto support\n", + npe_name(npe_c)); + return -ENODEV; + } /* buffer_pool will also be used to sometimes store the hmac, * so assure it is large enough */ @@ -459,6 +486,10 @@ static int init_ixp_crypto(void) qmgr_enable_irq(RECV_QID); return 0; + +npe_error: + printk(KERN_ERR "%s not responding\n", npe_name(npe_c)); + ret = -EIO; err: if (ctx_pool) dma_pool_destroy(ctx_pool); diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 3f0fdd18255d..856b3cc25583 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -489,4 +489,4 @@ MODULE_DESCRIPTION("VIA PadLock AES algorithm support"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Michal Ludvig"); -MODULE_ALIAS("aes-all"); +MODULE_ALIAS("aes"); diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index da8a8ed9e411..f18d1bde0439 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -179,9 +179,14 @@ static void dma_halt(struct fsl_dma_chan *fsl_chan) static void set_ld_eol(struct fsl_dma_chan *fsl_chan, struct fsl_desc_sw *desc) { + u64 snoop_bits; + + snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX) + ? FSL_DMA_SNEN : 0; + desc->hw.next_ln_addr = CPU_TO_DMA(fsl_chan, - DMA_TO_CPU(fsl_chan, desc->hw.next_ln_addr, 64) | FSL_DMA_EOL, - 64); + DMA_TO_CPU(fsl_chan, desc->hw.next_ln_addr, 64) | FSL_DMA_EOL + | snoop_bits, 64); } static void append_ld_queue(struct fsl_dma_chan *fsl_chan, @@ -313,8 +318,8 @@ static void fsl_chan_toggle_ext_start(struct fsl_dma_chan *fsl_chan, int enable) static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) { - struct fsl_desc_sw *desc = tx_to_fsl_desc(tx); struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan); + struct fsl_desc_sw *desc; unsigned long flags; dma_cookie_t cookie; @@ -322,14 +327,17 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) spin_lock_irqsave(&fsl_chan->desc_lock, flags); cookie = fsl_chan->common.cookie; - cookie++; - if (cookie < 0) - cookie = 1; - desc->async_tx.cookie = cookie; - fsl_chan->common.cookie = desc->async_tx.cookie; + list_for_each_entry(desc, &tx->tx_list, node) { + cookie++; + if (cookie < 0) + cookie = 1; - append_ld_queue(fsl_chan, desc); - list_splice_init(&desc->async_tx.tx_list, fsl_chan->ld_queue.prev); + desc->async_tx.cookie = cookie; + } + + fsl_chan->common.cookie = cookie; + append_ld_queue(fsl_chan, tx_to_fsl_desc(tx)); + list_splice_init(&tx->tx_list, fsl_chan->ld_queue.prev); spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); @@ -454,8 +462,8 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy( { struct fsl_dma_chan *fsl_chan; struct fsl_desc_sw *first = NULL, *prev = NULL, *new; + struct list_head *list; size_t copy; - LIST_HEAD(link_chain); if (!chan) return NULL; @@ -472,7 +480,7 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy( if (!new) { dev_err(fsl_chan->dev, "No free memory for link descriptor\n"); - return NULL; + goto fail; } #ifdef FSL_DMA_LD_DEBUG dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new); @@ -507,7 +515,19 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy( /* Set End-of-link to the last link descriptor of new list*/ set_ld_eol(fsl_chan, new); - return first ? &first->async_tx : NULL; + return &first->async_tx; + +fail: + if (!first) + return NULL; + + list = &first->async_tx.tx_list; + list_for_each_entry_safe_reverse(new, prev, list, node) { + list_del(&new->node); + dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys); + } + + return NULL; } /** @@ -598,15 +618,16 @@ static void fsl_chan_xfer_ld_queue(struct fsl_dma_chan *fsl_chan) dma_addr_t next_dest_addr; unsigned long flags; + spin_lock_irqsave(&fsl_chan->desc_lock, flags); + if (!dma_is_idle(fsl_chan)) - return; + goto out_unlock; dma_halt(fsl_chan); /* If there are some link descriptors * not transfered in queue. We need to start it. */ - spin_lock_irqsave(&fsl_chan->desc_lock, flags); /* Find the first un-transfer desciptor */ for (ld_node = fsl_chan->ld_queue.next; @@ -617,19 +638,20 @@ static void fsl_chan_xfer_ld_queue(struct fsl_dma_chan *fsl_chan) fsl_chan->common.cookie) == DMA_SUCCESS); ld_node = ld_node->next); - spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); - if (ld_node != &fsl_chan->ld_queue) { /* Get the ld start address from ld_queue */ next_dest_addr = to_fsl_desc(ld_node)->async_tx.phys; - dev_dbg(fsl_chan->dev, "xfer LDs staring from %p\n", - (void *)next_dest_addr); + dev_dbg(fsl_chan->dev, "xfer LDs staring from 0x%llx\n", + (unsigned long long)next_dest_addr); set_cdar(fsl_chan, next_dest_addr); dma_start(fsl_chan); } else { set_cdar(fsl_chan, 0); set_ndar(fsl_chan, 0); } + +out_unlock: + spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); } /** @@ -734,8 +756,9 @@ static irqreturn_t fsl_dma_chan_do_interrupt(int irq, void *data) */ if (stat & FSL_DMA_SR_EOSI) { dev_dbg(fsl_chan->dev, "event: End-of-segments INT\n"); - dev_dbg(fsl_chan->dev, "event: clndar %p, nlndar %p\n", - (void *)get_cdar(fsl_chan), (void *)get_ndar(fsl_chan)); + dev_dbg(fsl_chan->dev, "event: clndar 0x%llx, nlndar 0x%llx\n", + (unsigned long long)get_cdar(fsl_chan), + (unsigned long long)get_ndar(fsl_chan)); stat &= ~FSL_DMA_SR_EOSI; update_cookie = 1; } @@ -830,7 +853,7 @@ static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev, new_fsl_chan->reg.end - new_fsl_chan->reg.start + 1); new_fsl_chan->id = ((new_fsl_chan->reg.start - 0x100) & 0xfff) >> 7; - if (new_fsl_chan->id > FSL_DMA_MAX_CHANS_PER_DEVICE) { + if (new_fsl_chan->id >= FSL_DMA_MAX_CHANS_PER_DEVICE) { dev_err(fdev->dev, "There is no %d channel!\n", new_fsl_chan->id); err = -EINVAL; @@ -925,8 +948,8 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev, } dev_info(&dev->dev, "Probe the Freescale DMA driver for %s " - "controller at %p...\n", - match->compatible, (void *)fdev->reg.start); + "controller at 0x%llx...\n", + match->compatible, (unsigned long long)fdev->reg.start); fdev->reg_base = ioremap(fdev->reg.start, fdev->reg.end - fdev->reg.start + 1); diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c index 1955ee8d6d20..a600fc0f7962 100644 --- a/drivers/dma/ioat_dma.c +++ b/drivers/dma/ioat_dma.c @@ -173,7 +173,7 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); #ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL - if (i7300_idle_platform_probe(NULL, NULL) == 0) { + if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) { device->common.chancnt--; } #endif diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index e5f5c5a8ba6c..956982f8739b 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -192,16 +192,20 @@ config EDAC_PPC4XX config EDAC_AMD8131 tristate "AMD8131 HyperTransport PCI-X Tunnel" - depends on EDAC_MM_EDAC && PCI + depends on EDAC_MM_EDAC && PCI && PPC_MAPLE help Support for error detection and correction on the AMD8131 HyperTransport PCI-X Tunnel chip. + Note, add more Kconfig dependency if it's adopted + on some machine other than Maple. config EDAC_AMD8111 tristate "AMD8111 HyperTransport I/O Hub" - depends on EDAC_MM_EDAC && PCI + depends on EDAC_MM_EDAC && PCI && PPC_MAPLE help Support for error detection and correction on the AMD8111 HyperTransport I/O Hub chip. + Note, add more Kconfig dependency if it's adopted + on some machine other than Maple. endif # EDAC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index a5fdcf02f591..59076819135d 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -35,3 +35,5 @@ obj-$(CONFIG_EDAC_MPC85XX) += mpc85xx_edac.o obj-$(CONFIG_EDAC_MV64X60) += mv64x60_edac.o obj-$(CONFIG_EDAC_CELL) += cell_edac.o obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o +obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o +obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o diff --git a/drivers/edac/amd8111_edac.c b/drivers/edac/amd8111_edac.c index 614692181120..2cb58ef743e0 100644 --- a/drivers/edac/amd8111_edac.c +++ b/drivers/edac/amd8111_edac.c @@ -389,7 +389,7 @@ static int amd8111_dev_probe(struct pci_dev *dev, dev_info->edac_dev->dev = &dev_info->dev->dev; dev_info->edac_dev->mod_name = AMD8111_EDAC_MOD_STR; dev_info->edac_dev->ctl_name = dev_info->ctl_name; - dev_info->edac_dev->dev_name = dev_info->dev->dev.bus_id; + dev_info->edac_dev->dev_name = dev_name(&dev_info->dev->dev); if (edac_op_state == EDAC_OPSTATE_POLL) dev_info->edac_dev->edac_check = dev_info->check; @@ -473,7 +473,7 @@ static int amd8111_pci_probe(struct pci_dev *dev, pci_info->edac_dev->dev = &pci_info->dev->dev; pci_info->edac_dev->mod_name = AMD8111_EDAC_MOD_STR; pci_info->edac_dev->ctl_name = pci_info->ctl_name; - pci_info->edac_dev->dev_name = pci_info->dev->dev.bus_id; + pci_info->edac_dev->dev_name = dev_name(&pci_info->dev->dev); if (edac_op_state == EDAC_OPSTATE_POLL) pci_info->edac_dev->edac_check = pci_info->check; diff --git a/drivers/edac/amd8131_edac.c b/drivers/edac/amd8131_edac.c index c083b31cac5a..b432d60c622a 100644 --- a/drivers/edac/amd8131_edac.c +++ b/drivers/edac/amd8131_edac.c @@ -287,7 +287,7 @@ static int amd8131_probe(struct pci_dev *dev, const struct pci_device_id *id) dev_info->edac_dev->dev = &dev_info->dev->dev; dev_info->edac_dev->mod_name = AMD8131_EDAC_MOD_STR; dev_info->edac_dev->ctl_name = dev_info->ctl_name; - dev_info->edac_dev->dev_name = dev_info->dev->dev.bus_id; + dev_info->edac_dev->dev_name = dev_name(&dev_info->dev->dev); if (edac_op_state == EDAC_OPSTATE_POLL) dev_info->edac_dev->edac_check = amd8131_chipset.check; diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 17b24c580c09..f5d46e7199d4 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -67,12 +67,18 @@ config DRM_I830 will load the correct one. config DRM_I915 + tristate "i915 driver" select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT select FB select FRAMEBUFFER_CONSOLE if !EMBEDDED - tristate "i915 driver" + # i915 depends on ACPI_VIDEO when ACPI is enabled + # but for select to work, need to select ACPI_VIDEO's dependencies, ick + select VIDEO_OUTPUT_CONTROL if ACPI + select BACKLIGHT_CLASS_DEVICE if ACPI + select INPUT if ACPI + select ACPI_VIDEO if ACPI help Choose this option if you have a system that has Intel 830M, 845G, 852GM, 855GM 865G or 915G integrated graphics. If M is selected, the diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c index 6d80d17f1e96..0411d912d82a 100644 --- a/drivers/gpu/drm/drm_bufs.c +++ b/drivers/gpu/drm/drm_bufs.c @@ -170,6 +170,14 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset, } DRM_DEBUG("offset = 0x%08llx, size = 0x%08lx, type = %d\n", (unsigned long long)map->offset, map->size, map->type); + + /* page-align _DRM_SHM maps. They are allocated here so there is no security + * hole created by that and it works around various broken drivers that use + * a non-aligned quantity to map the SAREA. --BenH + */ + if (map->type == _DRM_SHM) + map->size = PAGE_ALIGN(map->size); + if ((map->offset & (~(resource_size_t)PAGE_MASK)) || (map->size & (~PAGE_MASK))) { drm_free(map, sizeof(*map), DRM_MEM_MAPS); return -EINVAL; diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 94a768871734..8fab7890a363 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -2294,7 +2294,12 @@ int drm_mode_connector_property_set_ioctl(struct drm_device *dev, } } - if (connector->funcs->set_property) + /* Do DPMS ourselves */ + if (property == connector->dev->mode_config.dpms_property) { + if (connector->funcs->dpms) + (*connector->funcs->dpms)(connector, (int) out_resp->value); + ret = 0; + } else if (connector->funcs->set_property) ret = connector->funcs->set_property(connector, property, out_resp->value); /* store the property value if succesful */ diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index 45890447feec..a6f73f1e99d9 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -199,6 +199,29 @@ static void drm_helper_add_std_modes(struct drm_device *dev, } /** + * drm_helper_encoder_in_use - check if a given encoder is in use + * @encoder: encoder to check + * + * LOCKING: + * Caller must hold mode config lock. + * + * Walk @encoders's DRM device's mode_config and see if it's in use. + * + * RETURNS: + * True if @encoder is part of the mode_config, false otherwise. + */ +bool drm_helper_encoder_in_use(struct drm_encoder *encoder) +{ + struct drm_connector *connector; + struct drm_device *dev = encoder->dev; + list_for_each_entry(connector, &dev->mode_config.connector_list, head) + if (connector->encoder == encoder) + return true; + return false; +} +EXPORT_SYMBOL(drm_helper_encoder_in_use); + +/** * drm_helper_crtc_in_use - check if a given CRTC is in a mode_config * @crtc: CRTC to check * @@ -216,7 +239,7 @@ bool drm_helper_crtc_in_use(struct drm_crtc *crtc) struct drm_device *dev = crtc->dev; /* FIXME: Locking around list access? */ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) - if (encoder->crtc == crtc) + if (encoder->crtc == crtc && drm_helper_encoder_in_use(encoder)) return true; return false; } @@ -240,7 +263,7 @@ void drm_helper_disable_unused_functions(struct drm_device *dev) list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { encoder_funcs = encoder->helper_private; - if (!encoder->crtc) + if (!drm_helper_encoder_in_use(encoder)) (*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF); } @@ -935,6 +958,88 @@ bool drm_helper_initial_config(struct drm_device *dev) } EXPORT_SYMBOL(drm_helper_initial_config); +static int drm_helper_choose_encoder_dpms(struct drm_encoder *encoder) +{ + int dpms = DRM_MODE_DPMS_OFF; + struct drm_connector *connector; + struct drm_device *dev = encoder->dev; + + list_for_each_entry(connector, &dev->mode_config.connector_list, head) + if (connector->encoder == encoder) + if (connector->dpms < dpms) + dpms = connector->dpms; + return dpms; +} + +static int drm_helper_choose_crtc_dpms(struct drm_crtc *crtc) +{ + int dpms = DRM_MODE_DPMS_OFF; + struct drm_connector *connector; + struct drm_device *dev = crtc->dev; + + list_for_each_entry(connector, &dev->mode_config.connector_list, head) + if (connector->encoder && connector->encoder->crtc == crtc) + if (connector->dpms < dpms) + dpms = connector->dpms; + return dpms; +} + +/** + * drm_helper_connector_dpms + * @connector affected connector + * @mode DPMS mode + * + * Calls the low-level connector DPMS function, then + * calls appropriate encoder and crtc DPMS functions as well + */ +void drm_helper_connector_dpms(struct drm_connector *connector, int mode) +{ + struct drm_encoder *encoder = connector->encoder; + struct drm_crtc *crtc = encoder ? encoder->crtc : NULL; + int old_dpms; + + if (mode == connector->dpms) + return; + + old_dpms = connector->dpms; + connector->dpms = mode; + + /* from off to on, do crtc then encoder */ + if (mode < old_dpms) { + if (crtc) { + struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; + if (crtc_funcs->dpms) + (*crtc_funcs->dpms) (crtc, + drm_helper_choose_crtc_dpms(crtc)); + } + if (encoder) { + struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private; + if (encoder_funcs->dpms) + (*encoder_funcs->dpms) (encoder, + drm_helper_choose_encoder_dpms(encoder)); + } + } + + /* from on to off, do encoder then crtc */ + if (mode > old_dpms) { + if (encoder) { + struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private; + if (encoder_funcs->dpms) + (*encoder_funcs->dpms) (encoder, + drm_helper_choose_encoder_dpms(encoder)); + } + if (crtc) { + struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private; + if (crtc_funcs->dpms) + (*crtc_funcs->dpms) (crtc, + drm_helper_choose_crtc_dpms(crtc)); + } + } + + return; +} +EXPORT_SYMBOL(drm_helper_connector_dpms); + /** * drm_hotplug_stage_two * @dev DRM device diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index f01def16a669..019b7c578236 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -481,7 +481,7 @@ int drm_ioctl(struct inode *inode, struct file *filp, } retcode = func(dev, kdata, file_priv); - if ((retcode == 0) && (cmd & IOC_OUT)) { + if (cmd & IOC_OUT) { if (copy_to_user((void __user *)arg, kdata, _IOC_SIZE(cmd)) != 0) retcode = -EFAULT; diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index ca9c61656714..6f6b26479d82 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -289,6 +289,11 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev, struct drm_display_mode *mode; struct detailed_pixel_timing *pt = &timing->data.pixel_data; + /* ignore tiny modes */ + if (((pt->hactive_hi << 8) | pt->hactive_lo) < 64 || + ((pt->vactive_hi << 8) | pt->hactive_lo) < 64) + return NULL; + if (pt->stereo) { printk(KERN_WARNING "stereo mode not supported\n"); return NULL; diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 93e677a481f5..fc8e5acd9d9a 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -196,6 +196,7 @@ int drm_irq_install(struct drm_device *dev) { int ret = 0; unsigned long sh_flags = 0; + char *irqname; if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ)) return -EINVAL; @@ -227,8 +228,13 @@ int drm_irq_install(struct drm_device *dev) if (drm_core_check_feature(dev, DRIVER_IRQ_SHARED)) sh_flags = IRQF_SHARED; + if (dev->devname) + irqname = dev->devname; + else + irqname = dev->driver->name; + ret = request_irq(drm_dev_to_irq(dev), dev->driver->irq_handler, - sh_flags, dev->devname, dev); + sh_flags, irqname, dev); if (ret < 0) { mutex_lock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index 8f9372921f82..9987ab880835 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -147,7 +147,7 @@ static ssize_t status_show(struct device *device, enum drm_connector_status status; status = connector->funcs->detect(connector); - return snprintf(buf, PAGE_SIZE, "%s", + return snprintf(buf, PAGE_SIZE, "%s\n", drm_get_connector_status_name(status)); } @@ -166,7 +166,7 @@ static ssize_t dpms_show(struct device *device, if (ret) return 0; - return snprintf(buf, PAGE_SIZE, "%s", + return snprintf(buf, PAGE_SIZE, "%s\n", drm_get_dpms_name((int)dpms_status)); } @@ -176,7 +176,7 @@ static ssize_t enabled_show(struct device *device, { struct drm_connector *connector = to_drm_connector(device); - return snprintf(buf, PAGE_SIZE, connector->encoder ? "enabled" : + return snprintf(buf, PAGE_SIZE, "%s\n", connector->encoder ? "enabled" : "disabled"); } @@ -317,6 +317,7 @@ static struct device_attribute connector_attrs_opt1[] = { static struct bin_attribute edid_attr = { .attr.name = "edid", + .attr.mode = 0444, .size = 128, .read = edid_show, }; diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 53d544552625..0ccb63ee50ee 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -987,12 +987,6 @@ static int i915_load_modeset_init(struct drm_device *dev) int fb_bar = IS_I9XX(dev) ? 2 : 0; int ret = 0; - dev->devname = kstrdup(DRIVER_NAME, GFP_KERNEL); - if (!dev->devname) { - ret = -ENOMEM; - goto out; - } - dev->mode_config.fb_base = drm_get_resource_start(dev, fb_bar) & 0xff000000; @@ -1006,7 +1000,7 @@ static int i915_load_modeset_init(struct drm_device *dev) ret = i915_probe_agp(dev, &agp_size, &prealloc_size); if (ret) - goto kfree_devname; + goto out; /* Basic memrange allocator for stolen space (aka vram) */ drm_mm_init(&dev_priv->vram, 0, prealloc_size); @@ -1024,7 +1018,7 @@ static int i915_load_modeset_init(struct drm_device *dev) ret = i915_gem_init_ringbuffer(dev); if (ret) - goto kfree_devname; + goto out; /* Allow hardware batchbuffers unless told otherwise. */ @@ -1056,8 +1050,6 @@ static int i915_load_modeset_init(struct drm_device *dev) destroy_ringbuffer: i915_gem_cleanup_ringbuffer(dev); -kfree_devname: - kfree(dev->devname); out: return ret; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9b149fe824c3..c431fa54bbb5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -180,7 +180,8 @@ typedef struct drm_i915_private { int backlight_duty_cycle; /* restore backlight to this value */ bool panel_wants_dither; struct drm_display_mode *panel_fixed_mode; - struct drm_display_mode *vbt_mode; /* if any */ + struct drm_display_mode *lfp_lvds_vbt_mode; /* if any */ + struct drm_display_mode *sdvo_lvds_vbt_mode; /* if any */ /* Feature bits from the VBIOS */ unsigned int int_tv_support:1; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b189b49c7602..39f5c658ef5e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -349,7 +349,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; num_pages = last_data_page - first_data_page + 1; - user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); + user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); if (user_pages == NULL) return -ENOMEM; @@ -429,7 +429,7 @@ fail_put_user_pages: SetPageDirty(user_pages[i]); page_cache_release(user_pages[i]); } - kfree(user_pages); + drm_free_large(user_pages); return ret; } @@ -649,7 +649,7 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; num_pages = last_data_page - first_data_page + 1; - user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); + user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); if (user_pages == NULL) return -ENOMEM; @@ -719,7 +719,7 @@ out_unlock: out_unpin_pages: for (i = 0; i < pinned_pages; i++) page_cache_release(user_pages[i]); - kfree(user_pages); + drm_free_large(user_pages); return ret; } @@ -824,7 +824,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; num_pages = last_data_page - first_data_page + 1; - user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); + user_pages = drm_calloc_large(num_pages, sizeof(struct page *)); if (user_pages == NULL) return -ENOMEM; @@ -902,7 +902,7 @@ fail_unlock: fail_put_user_pages: for (i = 0; i < pinned_pages; i++) page_cache_release(user_pages[i]); - kfree(user_pages); + drm_free_large(user_pages); return ret; } @@ -1145,7 +1145,14 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) mutex_unlock(&dev->struct_mutex); return VM_FAULT_SIGBUS; } - list_add(&obj_priv->list, &dev_priv->mm.inactive_list); + + ret = i915_gem_object_set_to_gtt_domain(obj, write); + if (ret) { + mutex_unlock(&dev->struct_mutex); + return VM_FAULT_SIGBUS; + } + + list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); } /* Need a new fence register? */ @@ -1375,7 +1382,7 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, mutex_unlock(&dev->struct_mutex); return ret; } - list_add(&obj_priv->list, &dev_priv->mm.inactive_list); + list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); } drm_gem_object_unreference(obj); @@ -1408,9 +1415,7 @@ i915_gem_object_put_pages(struct drm_gem_object *obj) } obj_priv->dirty = 0; - drm_free(obj_priv->pages, - page_count * sizeof(struct page *), - DRM_MEM_DRIVER); + drm_free_large(obj_priv->pages); obj_priv->pages = NULL; } @@ -2024,8 +2029,7 @@ i915_gem_object_get_pages(struct drm_gem_object *obj) */ page_count = obj->size / PAGE_SIZE; BUG_ON(obj_priv->pages != NULL); - obj_priv->pages = drm_calloc(page_count, sizeof(struct page *), - DRM_MEM_DRIVER); + obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *)); if (obj_priv->pages == NULL) { DRM_ERROR("Faled to allocate page list\n"); obj_priv->pages_refcount--; @@ -2131,8 +2135,10 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) return; } - pitch_val = (obj_priv->stride / 128) - 1; - WARN_ON(pitch_val & ~0x0000000f); + pitch_val = obj_priv->stride / 128; + pitch_val = ffs(pitch_val) - 1; + WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL); + val = obj_priv->gtt_offset; if (obj_priv->tiling_mode == I915_TILING_Y) val |= 1 << I830_FENCE_TILING_Y_SHIFT; @@ -2254,9 +2260,6 @@ try_again: goto try_again; } - BUG_ON(old_obj_priv->active || - (reg->obj->write_domain & I915_GEM_GPU_DOMAINS)); - /* * Zap this virtual mapping so we can set up a fence again * for this object next time we need it. @@ -2424,6 +2427,16 @@ i915_gem_clflush_object(struct drm_gem_object *obj) if (obj_priv->pages == NULL) return; + /* XXX: The 865 in particular appears to be weird in how it handles + * cache flushing. We haven't figured it out, but the + * clflush+agp_chipset_flush doesn't appear to successfully get the + * data visible to the PGU, while wbinvd + agp_chipset_flush does. + */ + if (IS_I865G(obj->dev)) { + wbinvd(); + return; + } + drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE); } @@ -3111,7 +3124,7 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list, reloc_count += exec_list[i].relocation_count; } - *relocs = drm_calloc(reloc_count, sizeof(**relocs), DRM_MEM_DRIVER); + *relocs = drm_calloc_large(reloc_count, sizeof(**relocs)); if (*relocs == NULL) return -ENOMEM; @@ -3125,8 +3138,7 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list, exec_list[i].relocation_count * sizeof(**relocs)); if (ret != 0) { - drm_free(*relocs, reloc_count * sizeof(**relocs), - DRM_MEM_DRIVER); + drm_free_large(*relocs); *relocs = NULL; return -EFAULT; } @@ -3165,7 +3177,7 @@ i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list, } err: - drm_free(relocs, reloc_count * sizeof(*relocs), DRM_MEM_DRIVER); + drm_free_large(relocs); return ret; } @@ -3198,10 +3210,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } /* Copy in the exec list from userland */ - exec_list = drm_calloc(sizeof(*exec_list), args->buffer_count, - DRM_MEM_DRIVER); - object_list = drm_calloc(sizeof(*object_list), args->buffer_count, - DRM_MEM_DRIVER); + exec_list = drm_calloc_large(sizeof(*exec_list), args->buffer_count); + object_list = drm_calloc_large(sizeof(*object_list), args->buffer_count); if (exec_list == NULL || object_list == NULL) { DRM_ERROR("Failed to allocate exec or object list " "for %d buffers\n", @@ -3462,10 +3472,8 @@ err: } pre_mutex_err: - drm_free(object_list, sizeof(*object_list) * args->buffer_count, - DRM_MEM_DRIVER); - drm_free(exec_list, sizeof(*exec_list) * args->buffer_count, - DRM_MEM_DRIVER); + drm_free_large(object_list); + drm_free_large(exec_list); drm_free(cliprects, sizeof(*cliprects) * args->num_cliprects, DRM_MEM_DRIVER); diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 52a059354e83..540dd336e6ec 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -213,7 +213,8 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) if (tiling_mode == I915_TILING_NONE) return true; - if (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) + if (!IS_I9XX(dev) || + (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))) tile_width = 128; else tile_width = 512; @@ -225,11 +226,18 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) if (stride / 128 > I965_FENCE_MAX_PITCH_VAL) return false; } else if (IS_I9XX(dev)) { - if (stride / tile_width > I830_FENCE_MAX_PITCH_VAL || + uint32_t pitch_val = ffs(stride / tile_width) - 1; + + /* XXX: For Y tiling, FENCE_MAX_PITCH_VAL is actually 6 (8KB) + * instead of 4 (2KB) on 945s. + */ + if (pitch_val > I915_FENCE_MAX_PITCH_VAL || size > (I830_FENCE_MAX_SIZE_VAL << 20)) return false; } else { - if (stride / 128 > I830_FENCE_MAX_PITCH_VAL || + uint32_t pitch_val = ffs(stride / tile_width) - 1; + + if (pitch_val > I830_FENCE_MAX_PITCH_VAL || size > (I830_FENCE_MAX_SIZE_VAL << 19)) return false; } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 15da44cf21b1..375569d01d01 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -190,7 +190,8 @@ #define I830_FENCE_SIZE_BITS(size) ((ffs((size) >> 19) - 1) << 8) #define I830_FENCE_PITCH_SHIFT 4 #define I830_FENCE_REG_VALID (1<<0) -#define I830_FENCE_MAX_PITCH_VAL 0x10 +#define I915_FENCE_MAX_PITCH_VAL 0x10 +#define I830_FENCE_MAX_PITCH_VAL 6 #define I830_FENCE_MAX_SIZE_VAL (1<<8) #define I915_FENCE_START_MASK 0x0ff00000 @@ -1410,9 +1411,25 @@ /* Cursor A & B regs */ #define CURACNTR 0x70080 +/* Old style CUR*CNTR flags (desktop 8xx) */ +#define CURSOR_ENABLE 0x80000000 +#define CURSOR_GAMMA_ENABLE 0x40000000 +#define CURSOR_STRIDE_MASK 0x30000000 +#define CURSOR_FORMAT_SHIFT 24 +#define CURSOR_FORMAT_MASK (0x07 << CURSOR_FORMAT_SHIFT) +#define CURSOR_FORMAT_2C (0x00 << CURSOR_FORMAT_SHIFT) +#define CURSOR_FORMAT_3C (0x01 << CURSOR_FORMAT_SHIFT) +#define CURSOR_FORMAT_4C (0x02 << CURSOR_FORMAT_SHIFT) +#define CURSOR_FORMAT_ARGB (0x04 << CURSOR_FORMAT_SHIFT) +#define CURSOR_FORMAT_XRGB (0x05 << CURSOR_FORMAT_SHIFT) +/* New style CUR*CNTR flags */ +#define CURSOR_MODE 0x27 #define CURSOR_MODE_DISABLE 0x00 #define CURSOR_MODE_64_32B_AX 0x07 #define CURSOR_MODE_64_ARGB_AX ((1 << 5) | CURSOR_MODE_64_32B_AX) +#define MCURSOR_PIPE_SELECT (1 << 28) +#define MCURSOR_PIPE_A 0x00 +#define MCURSOR_PIPE_B (1 << 28) #define MCURSOR_GAMMA_ENABLE (1 << 26) #define CURABASE 0x70084 #define CURAPOS 0x70088 @@ -1420,6 +1437,7 @@ #define CURSOR_POS_SIGN 0x8000 #define CURSOR_X_SHIFT 0 #define CURSOR_Y_SHIFT 16 +#define CURSIZE 0x700a0 #define CURBCNTR 0x700c0 #define CURBBASE 0x700c4 #define CURBPOS 0x700c8 diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index fc28e2bbd542..9d78cff33b24 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -57,9 +57,43 @@ find_section(struct bdb_header *bdb, int section_id) return NULL; } -/* Try to find panel data */ static void -parse_panel_data(struct drm_i915_private *dev_priv, struct bdb_header *bdb) +fill_detail_timing_data(struct drm_display_mode *panel_fixed_mode, + struct lvds_dvo_timing *dvo_timing) +{ + panel_fixed_mode->hdisplay = (dvo_timing->hactive_hi << 8) | + dvo_timing->hactive_lo; + panel_fixed_mode->hsync_start = panel_fixed_mode->hdisplay + + ((dvo_timing->hsync_off_hi << 8) | dvo_timing->hsync_off_lo); + panel_fixed_mode->hsync_end = panel_fixed_mode->hsync_start + + dvo_timing->hsync_pulse_width; + panel_fixed_mode->htotal = panel_fixed_mode->hdisplay + + ((dvo_timing->hblank_hi << 8) | dvo_timing->hblank_lo); + + panel_fixed_mode->vdisplay = (dvo_timing->vactive_hi << 8) | + dvo_timing->vactive_lo; + panel_fixed_mode->vsync_start = panel_fixed_mode->vdisplay + + dvo_timing->vsync_off; + panel_fixed_mode->vsync_end = panel_fixed_mode->vsync_start + + dvo_timing->vsync_pulse_width; + panel_fixed_mode->vtotal = panel_fixed_mode->vdisplay + + ((dvo_timing->vblank_hi << 8) | dvo_timing->vblank_lo); + panel_fixed_mode->clock = dvo_timing->clock * 10; + panel_fixed_mode->type = DRM_MODE_TYPE_PREFERRED; + + /* Some VBTs have bogus h/vtotal values */ + if (panel_fixed_mode->hsync_end > panel_fixed_mode->htotal) + panel_fixed_mode->htotal = panel_fixed_mode->hsync_end + 1; + if (panel_fixed_mode->vsync_end > panel_fixed_mode->vtotal) + panel_fixed_mode->vtotal = panel_fixed_mode->vsync_end + 1; + + drm_mode_set_name(panel_fixed_mode); +} + +/* Try to find integrated panel data */ +static void +parse_lfp_panel_data(struct drm_i915_private *dev_priv, + struct bdb_header *bdb) { struct bdb_lvds_options *lvds_options; struct bdb_lvds_lfp_data *lvds_lfp_data; @@ -91,38 +125,45 @@ parse_panel_data(struct drm_i915_private *dev_priv, struct bdb_header *bdb) panel_fixed_mode = drm_calloc(1, sizeof(*panel_fixed_mode), DRM_MEM_DRIVER); - panel_fixed_mode->hdisplay = (dvo_timing->hactive_hi << 8) | - dvo_timing->hactive_lo; - panel_fixed_mode->hsync_start = panel_fixed_mode->hdisplay + - ((dvo_timing->hsync_off_hi << 8) | dvo_timing->hsync_off_lo); - panel_fixed_mode->hsync_end = panel_fixed_mode->hsync_start + - dvo_timing->hsync_pulse_width; - panel_fixed_mode->htotal = panel_fixed_mode->hdisplay + - ((dvo_timing->hblank_hi << 8) | dvo_timing->hblank_lo); + fill_detail_timing_data(panel_fixed_mode, dvo_timing); - panel_fixed_mode->vdisplay = (dvo_timing->vactive_hi << 8) | - dvo_timing->vactive_lo; - panel_fixed_mode->vsync_start = panel_fixed_mode->vdisplay + - dvo_timing->vsync_off; - panel_fixed_mode->vsync_end = panel_fixed_mode->vsync_start + - dvo_timing->vsync_pulse_width; - panel_fixed_mode->vtotal = panel_fixed_mode->vdisplay + - ((dvo_timing->vblank_hi << 8) | dvo_timing->vblank_lo); - panel_fixed_mode->clock = dvo_timing->clock * 10; - panel_fixed_mode->type = DRM_MODE_TYPE_PREFERRED; + dev_priv->lfp_lvds_vbt_mode = panel_fixed_mode; - /* Some VBTs have bogus h/vtotal values */ - if (panel_fixed_mode->hsync_end > panel_fixed_mode->htotal) - panel_fixed_mode->htotal = panel_fixed_mode->hsync_end + 1; - if (panel_fixed_mode->vsync_end > panel_fixed_mode->vtotal) - panel_fixed_mode->vtotal = panel_fixed_mode->vsync_end + 1; + DRM_DEBUG("Found panel mode in BIOS VBT tables:\n"); + drm_mode_debug_printmodeline(panel_fixed_mode); - drm_mode_set_name(panel_fixed_mode); + return; +} + +/* Try to find sdvo panel data */ +static void +parse_sdvo_panel_data(struct drm_i915_private *dev_priv, + struct bdb_header *bdb) +{ + struct bdb_sdvo_lvds_options *sdvo_lvds_options; + struct lvds_dvo_timing *dvo_timing; + struct drm_display_mode *panel_fixed_mode; - dev_priv->vbt_mode = panel_fixed_mode; + dev_priv->sdvo_lvds_vbt_mode = NULL; - DRM_DEBUG("Found panel mode in BIOS VBT tables:\n"); - drm_mode_debug_printmodeline(panel_fixed_mode); + sdvo_lvds_options = find_section(bdb, BDB_SDVO_LVDS_OPTIONS); + if (!sdvo_lvds_options) + return; + + dvo_timing = find_section(bdb, BDB_SDVO_PANEL_DTDS); + if (!dvo_timing) + return; + + panel_fixed_mode = drm_calloc(1, sizeof(*panel_fixed_mode), + DRM_MEM_DRIVER); + + if (!panel_fixed_mode) + return; + + fill_detail_timing_data(panel_fixed_mode, + dvo_timing + sdvo_lvds_options->panel_type); + + dev_priv->sdvo_lvds_vbt_mode = panel_fixed_mode; return; } @@ -199,7 +240,8 @@ intel_init_bios(struct drm_device *dev) /* Grab useful general definitions */ parse_general_features(dev_priv, bdb); - parse_panel_data(dev_priv, bdb); + parse_lfp_panel_data(dev_priv, bdb); + parse_sdvo_panel_data(dev_priv, bdb); pci_unmap_rom(pdev, bios); diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/intel_bios.h index de621aad85b5..8ca2cde15804 100644 --- a/drivers/gpu/drm/i915/intel_bios.h +++ b/drivers/gpu/drm/i915/intel_bios.h @@ -279,6 +279,23 @@ struct vch_bdb_22 { struct vch_panel_data panels[16]; } __attribute__((packed)); +struct bdb_sdvo_lvds_options { + u8 panel_backlight; + u8 h40_set_panel_type; + u8 panel_type; + u8 ssc_clk_freq; + u16 als_low_trip; + u16 als_high_trip; + u8 sclalarcoeff_tab_row_num; + u8 sclalarcoeff_tab_row_size; + u8 coefficient[8]; + u8 panel_misc_bits_1; + u8 panel_misc_bits_2; + u8 panel_misc_bits_3; + u8 panel_misc_bits_4; +} __attribute__((packed)); + + bool intel_init_bios(struct drm_device *dev); /* diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 19148c3df637..79acc4f4c1f8 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -198,9 +198,142 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector) return intel_ddc_probe(intel_output); } +static enum drm_connector_status +intel_crt_load_detect(struct drm_crtc *crtc, struct intel_output *intel_output) +{ + struct drm_encoder *encoder = &intel_output->enc; + struct drm_device *dev = encoder->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + uint32_t pipe = intel_crtc->pipe; + uint32_t save_bclrpat; + uint32_t save_vtotal; + uint32_t vtotal, vactive; + uint32_t vsample; + uint32_t vblank, vblank_start, vblank_end; + uint32_t dsl; + uint32_t bclrpat_reg; + uint32_t vtotal_reg; + uint32_t vblank_reg; + uint32_t vsync_reg; + uint32_t pipeconf_reg; + uint32_t pipe_dsl_reg; + uint8_t st00; + enum drm_connector_status status; + + if (pipe == 0) { + bclrpat_reg = BCLRPAT_A; + vtotal_reg = VTOTAL_A; + vblank_reg = VBLANK_A; + vsync_reg = VSYNC_A; + pipeconf_reg = PIPEACONF; + pipe_dsl_reg = PIPEADSL; + } else { + bclrpat_reg = BCLRPAT_B; + vtotal_reg = VTOTAL_B; + vblank_reg = VBLANK_B; + vsync_reg = VSYNC_B; + pipeconf_reg = PIPEBCONF; + pipe_dsl_reg = PIPEBDSL; + } + + save_bclrpat = I915_READ(bclrpat_reg); + save_vtotal = I915_READ(vtotal_reg); + vblank = I915_READ(vblank_reg); + + vtotal = ((save_vtotal >> 16) & 0xfff) + 1; + vactive = (save_vtotal & 0x7ff) + 1; + + vblank_start = (vblank & 0xfff) + 1; + vblank_end = ((vblank >> 16) & 0xfff) + 1; + + /* Set the border color to purple. */ + I915_WRITE(bclrpat_reg, 0x500050); + + if (IS_I9XX(dev)) { + uint32_t pipeconf = I915_READ(pipeconf_reg); + I915_WRITE(pipeconf_reg, pipeconf | PIPECONF_FORCE_BORDER); + /* Wait for next Vblank to substitue + * border color for Color info */ + intel_wait_for_vblank(dev); + st00 = I915_READ8(VGA_MSR_WRITE); + status = ((st00 & (1 << 4)) != 0) ? + connector_status_connected : + connector_status_disconnected; + + I915_WRITE(pipeconf_reg, pipeconf); + } else { + bool restore_vblank = false; + int count, detect; + + /* + * If there isn't any border, add some. + * Yes, this will flicker + */ + if (vblank_start <= vactive && vblank_end >= vtotal) { + uint32_t vsync = I915_READ(vsync_reg); + uint32_t vsync_start = (vsync & 0xffff) + 1; + + vblank_start = vsync_start; + I915_WRITE(vblank_reg, + (vblank_start - 1) | + ((vblank_end - 1) << 16)); + restore_vblank = true; + } + /* sample in the vertical border, selecting the larger one */ + if (vblank_start - vactive >= vtotal - vblank_end) + vsample = (vblank_start + vactive) >> 1; + else + vsample = (vtotal + vblank_end) >> 1; + + /* + * Wait for the border to be displayed + */ + while (I915_READ(pipe_dsl_reg) >= vactive) + ; + while ((dsl = I915_READ(pipe_dsl_reg)) <= vsample) + ; + /* + * Watch ST00 for an entire scanline + */ + detect = 0; + count = 0; + do { + count++; + /* Read the ST00 VGA status register */ + st00 = I915_READ8(VGA_MSR_WRITE); + if (st00 & (1 << 4)) + detect++; + } while ((I915_READ(pipe_dsl_reg) == dsl)); + + /* restore vblank if necessary */ + if (restore_vblank) + I915_WRITE(vblank_reg, vblank); + /* + * If more than 3/4 of the scanline detected a monitor, + * then it is assumed to be present. This works even on i830, + * where there isn't any way to force the border color across + * the screen + */ + status = detect * 4 > count * 3 ? + connector_status_connected : + connector_status_disconnected; + } + + /* Restore previous settings */ + I915_WRITE(bclrpat_reg, save_bclrpat); + + return status; +} + static enum drm_connector_status intel_crt_detect(struct drm_connector *connector) { struct drm_device *dev = connector->dev; + struct intel_output *intel_output = to_intel_output(connector); + struct drm_encoder *encoder = &intel_output->enc; + struct drm_crtc *crtc; + int dpms_mode; + enum drm_connector_status status; if (IS_I9XX(dev) && !IS_I915G(dev) && !IS_I915GM(dev)) { if (intel_crt_detect_hotplug(connector)) @@ -212,8 +345,20 @@ static enum drm_connector_status intel_crt_detect(struct drm_connector *connecto if (intel_crt_detect_ddc(connector)) return connector_status_connected; - /* TODO use load detect */ - return connector_status_unknown; + /* for pre-945g platforms use load detect */ + if (encoder->crtc && encoder->crtc->enabled) { + status = intel_crt_load_detect(encoder->crtc, intel_output); + } else { + crtc = intel_get_load_detect_pipe(intel_output, + NULL, &dpms_mode); + if (crtc) { + status = intel_crt_load_detect(crtc, intel_output); + intel_release_load_detect_pipe(intel_output, dpms_mode); + } else + status = connector_status_unknown; + } + + return status; } static void intel_crt_destroy(struct drm_connector *connector) @@ -236,11 +381,6 @@ static int intel_crt_set_property(struct drm_connector *connector, struct drm_property *property, uint64_t value) { - struct drm_device *dev = connector->dev; - - if (property == dev->mode_config.dpms_property && connector->encoder) - intel_crt_dpms(connector->encoder, (uint32_t)(value & 0xf)); - return 0; } @@ -257,6 +397,7 @@ static const struct drm_encoder_helper_funcs intel_crt_helper_funcs = { }; static const struct drm_connector_funcs intel_crt_connector_funcs = { + .dpms = drm_helper_connector_dpms, .detect = intel_crt_detect, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = intel_crt_destroy, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3387cf32f385..c9d6f10ba92e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1357,7 +1357,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, int pipe = intel_crtc->pipe; uint32_t control = (pipe == 0) ? CURACNTR : CURBCNTR; uint32_t base = (pipe == 0) ? CURABASE : CURBBASE; - uint32_t temp; + uint32_t temp = I915_READ(control); size_t addr; int ret; @@ -1366,7 +1366,12 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, /* if we want to turn off the cursor ignore width and height */ if (!handle) { DRM_DEBUG("cursor off\n"); - temp = CURSOR_MODE_DISABLE; + if (IS_MOBILE(dev) || IS_I9XX(dev)) { + temp &= ~(CURSOR_MODE | MCURSOR_GAMMA_ENABLE); + temp |= CURSOR_MODE_DISABLE; + } else { + temp &= ~(CURSOR_ENABLE | CURSOR_GAMMA_ENABLE); + } addr = 0; bo = NULL; mutex_lock(&dev->struct_mutex); @@ -1409,10 +1414,19 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, addr = obj_priv->phys_obj->handle->busaddr; } - temp = 0; - /* set the pipe for the cursor */ - temp |= (pipe << 28); - temp |= CURSOR_MODE_64_ARGB_AX | MCURSOR_GAMMA_ENABLE; + if (!IS_I9XX(dev)) + I915_WRITE(CURSIZE, (height << 12) | width); + + /* Hooray for CUR*CNTR differences */ + if (IS_MOBILE(dev) || IS_I9XX(dev)) { + temp &= ~(CURSOR_MODE | MCURSOR_PIPE_SELECT); + temp |= CURSOR_MODE_64_ARGB_AX | MCURSOR_GAMMA_ENABLE; + temp |= (pipe << 28); /* Connect to correct pipe */ + } else { + temp &= ~(CURSOR_FORMAT_MASK); + temp |= CURSOR_ENABLE; + temp |= CURSOR_FORMAT_ARGB | CURSOR_GAMMA_ENABLE; + } finish: I915_WRITE(control, temp); diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index 8b8d6e65cd3f..1ee3007d6ec0 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -316,6 +316,7 @@ static const struct drm_encoder_helper_funcs intel_dvo_helper_funcs = { }; static const struct drm_connector_funcs intel_dvo_connector_funcs = { + .dpms = drm_helper_connector_dpms, .save = intel_dvo_save, .restore = intel_dvo_restore, .detect = intel_dvo_detect, diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index d0983bb93a18..7d6bdd705326 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -219,6 +219,7 @@ static const struct drm_encoder_helper_funcs intel_hdmi_helper_funcs = { }; static const struct drm_connector_funcs intel_hdmi_connector_funcs = { + .dpms = drm_helper_connector_dpms, .save = intel_hdmi_save, .restore = intel_hdmi_restore, .detect = intel_hdmi_detect, diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 439a86514993..53cccfa58b95 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -343,11 +343,6 @@ static int intel_lvds_set_property(struct drm_connector *connector, struct drm_property *property, uint64_t value) { - struct drm_device *dev = connector->dev; - - if (property == dev->mode_config.dpms_property && connector->encoder) - intel_lvds_dpms(connector->encoder, (uint32_t)(value & 0xf)); - return 0; } @@ -366,6 +361,7 @@ static const struct drm_connector_helper_funcs intel_lvds_connector_helper_funcs }; static const struct drm_connector_funcs intel_lvds_connector_funcs = { + .dpms = drm_helper_connector_dpms, .save = intel_lvds_save, .restore = intel_lvds_restore, .detect = intel_lvds_detect, @@ -391,7 +387,7 @@ static int __init intel_no_lvds_dmi_callback(const struct dmi_system_id *id) } /* These systems claim to have LVDS, but really don't */ -static const struct dmi_system_id __initdata intel_no_lvds[] = { +static const struct dmi_system_id intel_no_lvds[] = { { .callback = intel_no_lvds_dmi_callback, .ident = "Apple Mac Mini (Core series)", @@ -511,10 +507,10 @@ void intel_lvds_init(struct drm_device *dev) } /* Failed to get EDID, what about VBT? */ - if (dev_priv->vbt_mode) { + if (dev_priv->lfp_lvds_vbt_mode) { mutex_lock(&dev->mode_config.mutex); dev_priv->panel_fixed_mode = - drm_mode_duplicate(dev, dev_priv->vbt_mode); + drm_mode_duplicate(dev, dev_priv->lfp_lvds_vbt_mode); mutex_unlock(&dev->mode_config.mutex); if (dev_priv->panel_fixed_mode) { dev_priv->panel_fixed_mode->type |= diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 9913651c1e17..3093b4d4a4dd 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -69,6 +69,10 @@ struct intel_sdvo_priv { * This is set if we treat the device as HDMI, instead of DVI. */ bool is_hdmi; + /** + * This is set if we detect output of sdvo device as LVDS. + */ + bool is_lvds; /** * Returned SDTV resolutions allowed for the current format, if the @@ -1398,10 +1402,8 @@ static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connect static void intel_sdvo_get_ddc_modes(struct drm_connector *connector) { struct intel_output *intel_output = to_intel_output(connector); - struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv; /* set the bus switch and get the modes */ - intel_sdvo_set_control_bus_switch(intel_output, sdvo_priv->ddc_bus); intel_ddc_get_modes(intel_output); #if 0 @@ -1543,6 +1545,37 @@ static void intel_sdvo_get_tv_modes(struct drm_connector *connector) } } +static void intel_sdvo_get_lvds_modes(struct drm_connector *connector) +{ + struct intel_output *intel_output = to_intel_output(connector); + struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv; + struct drm_i915_private *dev_priv = connector->dev->dev_private; + + /* + * Attempt to get the mode list from DDC. + * Assume that the preferred modes are + * arranged in priority order. + */ + /* set the bus switch and get the modes */ + intel_sdvo_set_control_bus_switch(intel_output, sdvo_priv->ddc_bus); + intel_ddc_get_modes(intel_output); + if (list_empty(&connector->probed_modes) == false) + return; + + /* Fetch modes from VBT */ + if (dev_priv->sdvo_lvds_vbt_mode != NULL) { + struct drm_display_mode *newmode; + newmode = drm_mode_duplicate(connector->dev, + dev_priv->sdvo_lvds_vbt_mode); + if (newmode != NULL) { + /* Guarantee the mode is preferred */ + newmode->type = (DRM_MODE_TYPE_PREFERRED | + DRM_MODE_TYPE_DRIVER); + drm_mode_probed_add(connector, newmode); + } + } +} + static int intel_sdvo_get_modes(struct drm_connector *connector) { struct intel_output *output = to_intel_output(connector); @@ -1550,6 +1583,8 @@ static int intel_sdvo_get_modes(struct drm_connector *connector) if (sdvo_priv->is_tv) intel_sdvo_get_tv_modes(connector); + else if (sdvo_priv->is_lvds == true) + intel_sdvo_get_lvds_modes(connector); else intel_sdvo_get_ddc_modes(connector); @@ -1564,6 +1599,9 @@ static void intel_sdvo_destroy(struct drm_connector *connector) if (intel_output->i2c_bus) intel_i2c_destroy(intel_output->i2c_bus); + if (intel_output->ddc_bus) + intel_i2c_destroy(intel_output->ddc_bus); + drm_sysfs_connector_remove(connector); drm_connector_cleanup(connector); kfree(intel_output); @@ -1578,6 +1616,7 @@ static const struct drm_encoder_helper_funcs intel_sdvo_helper_funcs = { }; static const struct drm_connector_funcs intel_sdvo_connector_funcs = { + .dpms = drm_helper_connector_dpms, .save = intel_sdvo_save, .restore = intel_sdvo_restore, .detect = intel_sdvo_detect, @@ -1660,12 +1699,56 @@ intel_sdvo_get_digital_encoding_mode(struct intel_output *output) return true; } +static struct intel_output * +intel_sdvo_chan_to_intel_output(struct intel_i2c_chan *chan) +{ + struct drm_device *dev = chan->drm_dev; + struct drm_connector *connector; + struct intel_output *intel_output = NULL; + + list_for_each_entry(connector, + &dev->mode_config.connector_list, head) { + if (to_intel_output(connector)->ddc_bus == chan) { + intel_output = to_intel_output(connector); + break; + } + } + return intel_output; +} + +static int intel_sdvo_master_xfer(struct i2c_adapter *i2c_adap, + struct i2c_msg msgs[], int num) +{ + struct intel_output *intel_output; + struct intel_sdvo_priv *sdvo_priv; + struct i2c_algo_bit_data *algo_data; + struct i2c_algorithm *algo; + + algo_data = (struct i2c_algo_bit_data *)i2c_adap->algo_data; + intel_output = + intel_sdvo_chan_to_intel_output( + (struct intel_i2c_chan *)(algo_data->data)); + if (intel_output == NULL) + return -EINVAL; + + sdvo_priv = intel_output->dev_priv; + algo = (struct i2c_algorithm *)intel_output->i2c_bus->adapter.algo; + + intel_sdvo_set_control_bus_switch(intel_output, sdvo_priv->ddc_bus); + return algo->master_xfer(i2c_adap, msgs, num); +} + +static struct i2c_algorithm intel_sdvo_i2c_bit_algo = { + .master_xfer = intel_sdvo_master_xfer, +}; + bool intel_sdvo_init(struct drm_device *dev, int output_device) { struct drm_connector *connector; struct intel_output *intel_output; struct intel_sdvo_priv *sdvo_priv; struct intel_i2c_chan *i2cbus = NULL; + struct intel_i2c_chan *ddcbus = NULL; int connector_type; u8 ch[0x40]; int i; @@ -1676,17 +1759,9 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) return false; } - connector = &intel_output->base; - - drm_connector_init(dev, connector, &intel_sdvo_connector_funcs, - DRM_MODE_CONNECTOR_Unknown); - drm_connector_helper_add(connector, &intel_sdvo_connector_helper_funcs); sdvo_priv = (struct intel_sdvo_priv *)(intel_output + 1); intel_output->type = INTEL_OUTPUT_SDVO; - connector->interlace_allowed = 0; - connector->doublescan_allowed = 0; - /* setup the DDC bus. */ if (output_device == SDVOB) i2cbus = intel_i2c_create(dev, GPIOE, "SDVOCTRL_E for SDVOB"); @@ -1694,7 +1769,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) i2cbus = intel_i2c_create(dev, GPIOE, "SDVOCTRL_E for SDVOC"); if (!i2cbus) - goto err_connector; + goto err_inteloutput; sdvo_priv->i2c_bus = i2cbus; @@ -1710,7 +1785,6 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) intel_output->i2c_bus = i2cbus; intel_output->dev_priv = sdvo_priv; - /* Read the regs to test if we can talk to the device */ for (i = 0; i < 0x40; i++) { if (!intel_sdvo_read_byte(intel_output, i, &ch[i])) { @@ -1720,6 +1794,22 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) } } + /* setup the DDC bus. */ + if (output_device == SDVOB) + ddcbus = intel_i2c_create(dev, GPIOE, "SDVOB DDC BUS"); + else + ddcbus = intel_i2c_create(dev, GPIOE, "SDVOC DDC BUS"); + + if (ddcbus == NULL) + goto err_i2c; + + intel_sdvo_i2c_bit_algo.functionality = + intel_output->i2c_bus->adapter.algo->functionality; + ddcbus->adapter.algo = &intel_sdvo_i2c_bit_algo; + intel_output->ddc_bus = ddcbus; + + /* In defaut case sdvo lvds is false */ + sdvo_priv->is_lvds = false; intel_sdvo_get_capabilities(intel_output, &sdvo_priv->caps); if (sdvo_priv->caps.output_flags & @@ -1729,7 +1819,6 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) else sdvo_priv->controlled_output = SDVO_OUTPUT_TMDS1; - connector->display_info.subpixel_order = SubPixelHorizontalRGB; encoder_type = DRM_MODE_ENCODER_TMDS; connector_type = DRM_MODE_CONNECTOR_DVID; @@ -1747,7 +1836,6 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_SVID0) { sdvo_priv->controlled_output = SDVO_OUTPUT_SVID0; - connector->display_info.subpixel_order = SubPixelHorizontalRGB; encoder_type = DRM_MODE_ENCODER_TVDAC; connector_type = DRM_MODE_CONNECTOR_SVIDEO; sdvo_priv->is_tv = true; @@ -1756,30 +1844,28 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_RGB0) { sdvo_priv->controlled_output = SDVO_OUTPUT_RGB0; - connector->display_info.subpixel_order = SubPixelHorizontalRGB; encoder_type = DRM_MODE_ENCODER_DAC; connector_type = DRM_MODE_CONNECTOR_VGA; } else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_RGB1) { sdvo_priv->controlled_output = SDVO_OUTPUT_RGB1; - connector->display_info.subpixel_order = SubPixelHorizontalRGB; encoder_type = DRM_MODE_ENCODER_DAC; connector_type = DRM_MODE_CONNECTOR_VGA; } else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_LVDS0) { sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS0; - connector->display_info.subpixel_order = SubPixelHorizontalRGB; encoder_type = DRM_MODE_ENCODER_LVDS; connector_type = DRM_MODE_CONNECTOR_LVDS; + sdvo_priv->is_lvds = true; } else if (sdvo_priv->caps.output_flags & SDVO_OUTPUT_LVDS1) { sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS1; - connector->display_info.subpixel_order = SubPixelHorizontalRGB; encoder_type = DRM_MODE_ENCODER_LVDS; connector_type = DRM_MODE_CONNECTOR_LVDS; + sdvo_priv->is_lvds = true; } else { @@ -1795,9 +1881,16 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) goto err_i2c; } + connector = &intel_output->base; + drm_connector_init(dev, connector, &intel_sdvo_connector_funcs, + connector_type); + drm_connector_helper_add(connector, &intel_sdvo_connector_helper_funcs); + connector->interlace_allowed = 0; + connector->doublescan_allowed = 0; + connector->display_info.subpixel_order = SubPixelHorizontalRGB; + drm_encoder_init(dev, &intel_output->enc, &intel_sdvo_enc_funcs, encoder_type); drm_encoder_helper_add(&intel_output->enc, &intel_sdvo_helper_funcs); - connector->connector_type = connector_type; drm_mode_connector_attach_encoder(&intel_output->base, &intel_output->enc); drm_sysfs_connector_add(connector); @@ -1829,14 +1922,13 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) sdvo_priv->caps.output_flags & (SDVO_OUTPUT_TMDS1 | SDVO_OUTPUT_RGB1) ? 'Y' : 'N'); - intel_output->ddc_bus = i2cbus; - return true; err_i2c: + if (ddcbus != NULL) + intel_i2c_destroy(intel_output->ddc_bus); intel_i2c_destroy(intel_output->i2c_bus); -err_connector: - drm_connector_cleanup(connector); +err_inteloutput: kfree(intel_output); return false; diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index d2c32983242d..98ac0546b7bd 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1626,6 +1626,7 @@ static const struct drm_encoder_helper_funcs intel_tv_helper_funcs = { }; static const struct drm_connector_funcs intel_tv_connector_funcs = { + .dpms = drm_helper_connector_dpms, .save = intel_tv_save, .restore = intel_tv_restore, .detect = intel_tv_detect, diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c index 77a7a4d84650..aff90bb96488 100644 --- a/drivers/gpu/drm/radeon/radeon_cp.c +++ b/drivers/gpu/drm/radeon/radeon_cp.c @@ -2185,9 +2185,9 @@ void radeon_commit_ring(drm_radeon_private_t *dev_priv) /* check if the ring is padded out to 16-dword alignment */ - tail_aligned = dev_priv->ring.tail & 0xf; + tail_aligned = dev_priv->ring.tail & (RADEON_RING_ALIGN-1); if (tail_aligned) { - int num_p2 = 16 - tail_aligned; + int num_p2 = RADEON_RING_ALIGN - tail_aligned; ring = dev_priv->ring.start; /* pad with some CP_PACKET2 */ diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h index 8071d965f142..0c6bfc1de153 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.h +++ b/drivers/gpu/drm/radeon/radeon_drv.h @@ -1964,11 +1964,14 @@ do { \ #define RING_LOCALS int write, _nr, _align_nr; unsigned int mask; u32 *ring; +#define RADEON_RING_ALIGN 16 + #define BEGIN_RING( n ) do { \ if ( RADEON_VERBOSE ) { \ DRM_INFO( "BEGIN_RING( %d )\n", (n)); \ } \ - _align_nr = (n + 0xf) & ~0xf; \ + _align_nr = RADEON_RING_ALIGN - ((dev_priv->ring.tail + n) & (RADEON_RING_ALIGN-1)); \ + _align_nr += n; \ if (dev_priv->ring.space <= (_align_nr * sizeof(u32))) { \ COMMIT_RING(); \ radeon_wait_ring( dev_priv, _align_nr * sizeof(u32)); \ diff --git a/drivers/hwmon/lm78.c b/drivers/hwmon/lm78.c index b5e3b2851698..a1787fdf5b9f 100644 --- a/drivers/hwmon/lm78.c +++ b/drivers/hwmon/lm78.c @@ -182,7 +182,7 @@ static struct platform_driver lm78_isa_driver = { .name = "lm78", }, .probe = lm78_isa_probe, - .remove = lm78_isa_remove, + .remove = __devexit_p(lm78_isa_remove), }; diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c index 4e16ce68b063..36da913cc553 100644 --- a/drivers/ide/icside.c +++ b/drivers/ide/icside.c @@ -466,7 +466,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec) struct ide_host *host; unsigned int sel = 0; int ret; - hw_regs_t hw[2], *hws[] = { &hw[0], NULL, NULL, NULL }; + hw_regs_t hw[2], *hws[] = { &hw[0], &hw[1], NULL, NULL }; struct ide_port_info d = icside_v6_port_info; ioc_base = ecardm_iomap(ec, ECARD_RES_IOCFAST, 0, 0); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 35dc38d3b2c5..6415a2e2ba87 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -696,7 +696,7 @@ void ide_timer_expiry (unsigned long data) } spin_lock_irq(&hwif->lock); enable_irq(hwif->irq); - if (startstop == ide_stopped) { + if (startstop == ide_stopped && hwif->polling == 0) { ide_unlock_port(hwif); plug_device = 1; } @@ -868,7 +868,7 @@ irqreturn_t ide_intr (int irq, void *dev_id) * same irq as is currently being serviced here, and Linux * won't allow another of the same (on any CPU) until we return. */ - if (startstop == ide_stopped) { + if (startstop == ide_stopped && hwif->polling == 0) { BUG_ON(hwif->handler); ide_unlock_port(hwif); plug_device = 1; diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index c19a221b1e18..06fe002116ec 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -206,8 +206,6 @@ EXPORT_SYMBOL_GPL(ide_in_drive_list); /* * Early UDMA66 devices don't set bit14 to 1, only bit13 is valid. - * We list them here and depend on the device side cable detection for them. - * * Some optical devices with the buggy firmwares have the same problem. */ static const struct drive_list_entry ivb_list[] = { @@ -251,10 +249,25 @@ u8 eighty_ninty_three(ide_drive_t *drive) * - force bit13 (80c cable present) check also for !ivb devices * (unless the slave device is pre-ATA3) */ - if ((id[ATA_ID_HW_CONFIG] & 0x4000) || - (ivb && (id[ATA_ID_HW_CONFIG] & 0x2000))) + if (id[ATA_ID_HW_CONFIG] & 0x4000) return 1; + if (ivb) { + const char *model = (char *)&id[ATA_ID_PROD]; + + if (strstr(model, "TSSTcorp CDDVDW SH-S202")) { + /* + * These ATAPI devices always report 80c cable + * so we have to depend on the host in this case. + */ + if (hwif->cbl == ATA_CBL_PATA80) + return 1; + } else { + /* Depend on the device side cable detection. */ + if (id[ATA_ID_HW_CONFIG] & 0x2000) + return 1; + } + } no_80w: if (drive->dev_flags & IDE_DFLAG_UDMA33_WARNED) return 0; diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c index 56ff8c46c7d1..2148df836ce7 100644 --- a/drivers/ide/ide-lib.c +++ b/drivers/ide/ide-lib.c @@ -31,24 +31,6 @@ void ide_toggle_bounce(ide_drive_t *drive, int on) blk_queue_bounce_limit(drive->queue, addr); } -static void ide_dump_opcode(ide_drive_t *drive) -{ - struct request *rq = drive->hwif->rq; - struct ide_cmd *cmd = NULL; - - if (!rq) - return; - - if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) - cmd = rq->special; - - printk(KERN_ERR "ide: failed opcode was: "); - if (cmd == NULL) - printk(KERN_CONT "unknown\n"); - else - printk(KERN_CONT "0x%02x\n", cmd->tf.command); -} - u64 ide_get_lba_addr(struct ide_cmd *cmd, int lba48) { struct ide_taskfile *tf = &cmd->tf; @@ -91,7 +73,7 @@ static void ide_dump_sector(ide_drive_t *drive) static void ide_dump_ata_error(ide_drive_t *drive, u8 err) { - printk(KERN_ERR "{ "); + printk(KERN_CONT "{ "); if (err & ATA_ABORTED) printk(KERN_CONT "DriveStatusError "); if (err & ATA_ICRC) @@ -121,7 +103,7 @@ static void ide_dump_ata_error(ide_drive_t *drive, u8 err) static void ide_dump_atapi_error(ide_drive_t *drive, u8 err) { - printk(KERN_ERR "{ "); + printk(KERN_CONT "{ "); if (err & ATAPI_ILI) printk(KERN_CONT "IllegalLengthIndication "); if (err & ATAPI_EOM) @@ -179,7 +161,10 @@ u8 ide_dump_status(ide_drive_t *drive, const char *msg, u8 stat) else ide_dump_atapi_error(drive, err); } - ide_dump_opcode(drive); + + printk(KERN_ERR "%s: possibly failed opcode: 0x%02x\n", + drive->name, drive->hwif->cmd.tf.command); + return err; } EXPORT_SYMBOL(ide_dump_status); diff --git a/drivers/ide/ide-pci-generic.c b/drivers/ide/ide-pci-generic.c index 61111fd27130..39d4e01f5c9c 100644 --- a/drivers/ide/ide-pci-generic.c +++ b/drivers/ide/ide-pci-generic.c @@ -33,6 +33,16 @@ static int ide_generic_all; /* Set to claim all devices */ module_param_named(all_generic_ide, ide_generic_all, bool, 0444); MODULE_PARM_DESC(all_generic_ide, "IDE generic will claim all unknown PCI IDE storage controllers."); +static void netcell_quirkproc(ide_drive_t *drive) +{ + /* mark words 85-87 as valid */ + drive->id[ATA_ID_CSF_DEFAULT] |= 0x4000; +} + +static const struct ide_port_ops netcell_port_ops = { + .quirkproc = netcell_quirkproc, +}; + #define DECLARE_GENERIC_PCI_DEV(extra_flags) \ { \ .name = DRV_NAME, \ @@ -74,6 +84,7 @@ static const struct ide_port_info generic_chipsets[] __devinitdata = { { /* 6: Revolution */ .name = DRV_NAME, + .port_ops = &netcell_port_ops, .host_flags = IDE_HFLAG_CLEAR_SIMPLEX | IDE_HFLAG_TRUST_BIOS_FOR_DMA | IDE_HFLAG_OFF_BOARD, diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 7f264ed1141b..c895ed52b2e8 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -295,7 +295,7 @@ int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id) timeout = ((cmd == ATA_CMD_ID_ATA) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2; - if (ide_busy_sleep(hwif, timeout, use_altstatus)) + if (ide_busy_sleep(drive, timeout, use_altstatus)) return 1; /* wait for IRQ and ATA_DRQ */ @@ -316,8 +316,9 @@ int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id) return rc; } -int ide_busy_sleep(ide_hwif_t *hwif, unsigned long timeout, int altstatus) +int ide_busy_sleep(ide_drive_t *drive, unsigned long timeout, int altstatus) { + ide_hwif_t *hwif = drive->hwif; u8 stat; timeout += jiffies; @@ -330,6 +331,8 @@ int ide_busy_sleep(ide_hwif_t *hwif, unsigned long timeout, int altstatus) return 0; } while (time_before(jiffies, timeout)); + printk(KERN_ERR "%s: timeout in %s\n", drive->name, __func__); + return 1; /* drive timed-out */ } @@ -420,7 +423,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd) tp_ops->dev_select(drive); msleep(50); tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET); - (void)ide_busy_sleep(hwif, WAIT_WORSTCASE, 0); + (void)ide_busy_sleep(drive, WAIT_WORSTCASE, 0); rc = ide_dev_read_id(drive, cmd, id); } diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index cb942a9b580f..3a53e0834cf7 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -614,12 +614,6 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive, { idetape_tape_t *tape = drive->driver_data; - if (drive->pc->c[0] == REQUEST_SENSE && - pc->c[0] == REQUEST_SENSE) { - printk(KERN_ERR "ide-tape: possible ide-tape.c bug - " - "Two request sense in serial were issued\n"); - } - if (drive->failed_pc == NULL && pc->c[0] != REQUEST_SENSE) drive->failed_pc = pc; diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c index 248a54bd2386..b3bc96f930a6 100644 --- a/drivers/ide/pdc202xx_old.c +++ b/drivers/ide/pdc202xx_old.c @@ -1,6 +1,6 @@ /* * Copyright (C) 1998-2002 Andre Hedrick <andre@linux-ide.org> - * Copyright (C) 2006-2007 MontaVista Software, Inc. + * Copyright (C) 2006-2007, 2009 MontaVista Software, Inc. * Copyright (C) 2007 Bartlomiej Zolnierkiewicz * * Portions Copyright (C) 1999 Promise Technology, Inc. @@ -227,28 +227,19 @@ somebody_else: return (dma_stat & 4) == 4; /* return 1 if INTR asserted */ } -static void pdc202xx_reset_host (ide_hwif_t *hwif) +static void pdc202xx_reset(ide_drive_t *drive) { + ide_hwif_t *hwif = drive->hwif; unsigned long high_16 = hwif->extra_base - 16; u8 udma_speed_flag = inb(high_16 | 0x001f); + printk(KERN_WARNING "PDC202xx: software reset...\n"); + outb(udma_speed_flag | 0x10, high_16 | 0x001f); mdelay(100); outb(udma_speed_flag & ~0x10, high_16 | 0x001f); mdelay(2000); /* 2 seconds ?! */ - printk(KERN_WARNING "PDC202XX: %s channel reset.\n", - hwif->channel ? "Secondary" : "Primary"); -} - -static void pdc202xx_reset (ide_drive_t *drive) -{ - ide_hwif_t *hwif = drive->hwif; - ide_hwif_t *mate = hwif->mate; - - pdc202xx_reset_host(hwif); - pdc202xx_reset_host(mate); - ide_set_max_pio(drive); } @@ -328,9 +319,8 @@ static const struct ide_dma_ops pdc20246_dma_ops = { .dma_start = ide_dma_start, .dma_end = ide_dma_end, .dma_test_irq = pdc202xx_dma_test_irq, - .dma_lost_irq = pdc202xx_dma_lost_irq, + .dma_lost_irq = ide_dma_lost_irq, .dma_timer_expiry = ide_dma_sff_timer_expiry, - .dma_clear = pdc202xx_reset, .dma_sff_read_status = ide_dma_sff_read_status, }; diff --git a/drivers/ide/piix.c b/drivers/ide/piix.c index 2aa699933064..69860dea3820 100644 --- a/drivers/ide/piix.c +++ b/drivers/ide/piix.c @@ -263,6 +263,7 @@ static const struct ich_laptop ich_laptop[] = { { 0x24CA, 0x1025, 0x003d }, /* ICH4 on ACER TM290 */ { 0x266F, 0x1025, 0x0066 }, /* ICH6 on ACER Aspire 1694WLMi */ { 0x2653, 0x1043, 0x82D8 }, /* ICH6M on Asus Eee 701 */ + { 0x27df, 0x104d, 0x900e }, /* ICH7 on Sony TZ-90 */ /* end marker */ { 0, } }; diff --git a/drivers/ide/via82cxxx.c b/drivers/ide/via82cxxx.c index 3ff7231e4858..028de26a25fe 100644 --- a/drivers/ide/via82cxxx.c +++ b/drivers/ide/via82cxxx.c @@ -67,6 +67,7 @@ static struct via_isa_bridge { u8 udma_mask; u8 flags; } via_isa_bridges[] = { + { "vx855", PCI_DEVICE_ID_VIA_VX855, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, { "vx800", PCI_DEVICE_ID_VIA_VX800, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, { "vt8237s", PCI_DEVICE_ID_VIA_8237S, 0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST }, @@ -474,6 +475,7 @@ static const struct pci_device_id via_pci_tbl[] = { { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_82C576_1), 0 }, { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_82C586_1), 0 }, { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_CX700_IDE), 0 }, + { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_VX855_IDE), 0 }, { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_6410), 1 }, { PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_SATA_EIDE), 1 }, { 0, }, diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c index bf740394d704..949c97ff57e3 100644 --- a/drivers/idle/i7300_idle.c +++ b/drivers/idle/i7300_idle.c @@ -41,6 +41,10 @@ static int debug; module_param_named(debug, debug, uint, 0644); MODULE_PARM_DESC(debug, "Enable debug printks in this driver"); +static int forceload; +module_param_named(forceload, forceload, uint, 0644); +MODULE_PARM_DESC(debug, "Enable driver testing on unvalidated i5000"); + #define dprintk(fmt, arg...) \ do { if (debug) printk(KERN_INFO I7300_PRINT fmt, ##arg); } while (0) @@ -552,7 +556,7 @@ static int __init i7300_idle_init(void) cpus_clear(idle_cpumask); total_us = 0; - if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev)) + if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload)) return -ENODEV; if (i7300_idle_thrt_save()) diff --git a/drivers/input/input.c b/drivers/input/input.c index e54e002665b0..5d445f48789b 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -42,6 +42,7 @@ static unsigned int input_abs_bypass_init_data[] __initdata = { ABS_MT_POSITION_Y, ABS_MT_TOOL_TYPE, ABS_MT_BLOB_ID, + ABS_MT_TRACKING_ID, 0 }; static unsigned long input_abs_bypass[BITS_TO_LONGS(ABS_CNT)]; diff --git a/drivers/input/serio/ambakmi.c b/drivers/input/serio/ambakmi.c index e29cdc13a199..a28c06d686e1 100644 --- a/drivers/input/serio/ambakmi.c +++ b/drivers/input/serio/ambakmi.c @@ -107,7 +107,7 @@ static void amba_kmi_close(struct serio *io) clk_disable(kmi->clk); } -static int amba_kmi_probe(struct amba_device *dev, void *id) +static int amba_kmi_probe(struct amba_device *dev, struct amba_id *id) { struct amba_kmi_port *kmi; struct serio *io; diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c index 67248c31e19a..be5bbbb8ae4e 100644 --- a/drivers/input/serio/libps2.c +++ b/drivers/input/serio/libps2.c @@ -210,7 +210,7 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command) timeout = wait_event_timeout(ps2dev->wait, !(ps2dev->flags & PS2_FLAG_CMD1), timeout); - if (ps2dev->cmdcnt && timeout > 0) { + if (ps2dev->cmdcnt && !(ps2dev->flags & PS2_FLAG_CMD1)) { timeout = ps2_adjust_timeout(ps2dev, command, timeout); wait_event_timeout(ps2dev->wait, diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c index f100c7f4c1db..6954f5500108 100644 --- a/drivers/input/touchscreen/ucb1400_ts.c +++ b/drivers/input/touchscreen/ucb1400_ts.c @@ -419,7 +419,7 @@ static int ucb1400_ts_remove(struct platform_device *dev) #ifdef CONFIG_PM static int ucb1400_ts_resume(struct platform_device *dev) { - struct ucb1400_ts *ucb = platform_get_drvdata(dev); + struct ucb1400_ts *ucb = dev->dev.platform_data; if (ucb->ts_task) { /* diff --git a/drivers/isdn/gigaset/isocdata.c b/drivers/isdn/gigaset/isocdata.c index b171e75cb52e..29808c4fb1cb 100644 --- a/drivers/isdn/gigaset/isocdata.c +++ b/drivers/isdn/gigaset/isocdata.c @@ -175,7 +175,7 @@ int gigaset_isowbuf_getbytes(struct isowbuf_t *iwb, int size) return -EINVAL; } src = iwb->read; - if (unlikely(limit > BAS_OUTBUFSIZE + BAS_OUTBUFPAD || + if (unlikely(limit >= BAS_OUTBUFSIZE + BAS_OUTBUFPAD || (read < src && limit >= src))) { pr_err("isoc write buffer frame reservation violated\n"); return -EFAULT; diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 1a83910f674f..eaf722fe309a 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -358,6 +358,16 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) if (emulate_insn(cpu)) return; } + /* If KVM is active, the vmcall instruction triggers a + * General Protection Fault. Normally it triggers an + * invalid opcode fault (6): */ + case 6: + /* We need to check if ring == GUEST_PL and + * faulting instruction == vmcall. */ + if (is_hypercall(cpu)) { + rewrite_hypercall(cpu); + return; + } break; case 14: /* We've intercepted a Page Fault. */ /* The Guest accessed a virtual address that wasn't mapped. @@ -403,15 +413,6 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) * up the pointer now to indicate a hypercall is pending. */ cpu->hcall = (struct hcall_args *)cpu->regs; return; - case 6: - /* kvm hypercalls trigger an invalid opcode fault (6). - * We need to check if ring == GUEST_PL and - * faulting instruction == vmcall. */ - if (is_hypercall(cpu)) { - rewrite_hypercall(cpu); - return; - } - break; } /* We didn't handle the trap, so it needs to go to the Guest. */ diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 47c68bc75a17..56df1cee8fb3 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1097,14 +1097,12 @@ void bitmap_daemon_work(struct bitmap *bitmap) } bitmap->allclean = 1; + spin_lock_irqsave(&bitmap->lock, flags); for (j = 0; j < bitmap->chunks; j++) { bitmap_counter_t *bmc; - spin_lock_irqsave(&bitmap->lock, flags); - if (!bitmap->filemap) { + if (!bitmap->filemap) /* error or shutdown */ - spin_unlock_irqrestore(&bitmap->lock, flags); break; - } page = filemap_get_page(bitmap, j); @@ -1121,6 +1119,8 @@ void bitmap_daemon_work(struct bitmap *bitmap) write_page(bitmap, page, 0); bitmap->allclean = 0; } + spin_lock_irqsave(&bitmap->lock, flags); + j |= (PAGE_BITS - 1); continue; } @@ -1181,9 +1181,10 @@ void bitmap_daemon_work(struct bitmap *bitmap) ext2_clear_bit(file_page_offset(j), paddr); kunmap_atomic(paddr, KM_USER0); } - } - spin_unlock_irqrestore(&bitmap->lock, flags); + } else + j |= PAGE_COUNTER_MASK; } + spin_unlock_irqrestore(&bitmap->lock, flags); /* now sync the final page */ if (lastpage != NULL) { diff --git a/drivers/md/md.c b/drivers/md/md.c index fccc8343a250..641b211fe3fe 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1375,6 +1375,9 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->raid_disks = cpu_to_le32(mddev->raid_disks); sb->size = cpu_to_le64(mddev->dev_sectors); + sb->chunksize = cpu_to_le32(mddev->chunk_size >> 9); + sb->level = cpu_to_le32(mddev->level); + sb->layout = cpu_to_le32(mddev->layout); if (mddev->bitmap && mddev->bitmap_file == NULL) { sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); @@ -3303,7 +3306,9 @@ static ssize_t action_show(mddev_t *mddev, char *page) { char *type = "idle"; - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || + if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) + type = "frozen"; + else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) { if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) type = "reshape"; @@ -3326,7 +3331,12 @@ action_store(mddev_t *mddev, const char *page, size_t len) if (!mddev->pers || !mddev->pers->sync_request) return -EINVAL; - if (cmd_match(page, "idle")) { + if (cmd_match(page, "frozen")) + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + else + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + + if (cmd_match(page, "idle") || cmd_match(page, "frozen")) { if (mddev->sync_thread) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_unregister_thread(mddev->sync_thread); @@ -3680,7 +3690,7 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len) if (strict_blocks_to_sectors(buf, §ors) < 0) return -EINVAL; if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors) - return -EINVAL; + return -E2BIG; mddev->external_size = 1; } @@ -5557,7 +5567,7 @@ static struct block_device_operations md_fops = .owner = THIS_MODULE, .open = md_open, .release = md_release, - .locked_ioctl = md_ioctl, + .ioctl = md_ioctl, .getgeo = md_getgeo, .media_changed = md_media_changed, .revalidate_disk= md_revalidate, @@ -6352,12 +6362,13 @@ void md_do_sync(mddev_t *mddev) skipped = 0; - if ((mddev->curr_resync > mddev->curr_resync_completed && - (mddev->curr_resync - mddev->curr_resync_completed) - > (max_sectors >> 4)) || - (j - mddev->curr_resync_completed)*2 - >= mddev->resync_max - mddev->curr_resync_completed - ) { + if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && + ((mddev->curr_resync > mddev->curr_resync_completed && + (mddev->curr_resync - mddev->curr_resync_completed) + > (max_sectors >> 4)) || + (j - mddev->curr_resync_completed)*2 + >= mddev->resync_max - mddev->curr_resync_completed + )) { /* time to update curr_resync_completed */ blk_unplug(mddev->queue); wait_event(mddev->recovery_wait, diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4616bc3a6e71..bb37fb1b2d82 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -362,7 +362,7 @@ static void raid5_unplug_device(struct request_queue *q); static struct stripe_head * get_active_stripe(raid5_conf_t *conf, sector_t sector, - int previous, int noblock) + int previous, int noblock, int noquiesce) { struct stripe_head *sh; @@ -372,7 +372,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector, do { wait_event_lock_irq(conf->wait_for_stripe, - conf->quiesce == 0, + conf->quiesce == 0 || noquiesce, conf->device_lock, /* nothing */); sh = __find_stripe(conf, sector, conf->generation - previous); if (!sh) { @@ -2671,7 +2671,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, sector_t bn = compute_blocknr(sh, i, 1); sector_t s = raid5_compute_sector(conf, bn, 0, &dd_idx, NULL); - sh2 = get_active_stripe(conf, s, 0, 1); + sh2 = get_active_stripe(conf, s, 0, 1, 1); if (sh2 == NULL) /* so far only the early blocks of this stripe * have been requested. When later blocks @@ -2944,7 +2944,7 @@ static bool handle_stripe5(struct stripe_head *sh) /* Finish reconstruct operations initiated by the expansion process */ if (sh->reconstruct_state == reconstruct_state_result) { struct stripe_head *sh2 - = get_active_stripe(conf, sh->sector, 1, 1); + = get_active_stripe(conf, sh->sector, 1, 1, 1); if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { /* sh cannot be written until sh2 has been read. * so arrange for sh to be delayed a little @@ -3189,7 +3189,7 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { struct stripe_head *sh2 - = get_active_stripe(conf, sh->sector, 1, 1); + = get_active_stripe(conf, sh->sector, 1, 1, 1); if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { /* sh cannot be written until sh2 has been read. * so arrange for sh to be delayed a little @@ -3288,7 +3288,7 @@ static void unplug_slaves(mddev_t *mddev) int i; rcu_read_lock(); - for (i=0; i<mddev->raid_disks; i++) { + for (i = 0; i < conf->raid_disks; i++) { mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev); if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) { struct request_queue *r_queue = bdev_get_queue(rdev->bdev); @@ -3675,7 +3675,7 @@ static int make_request(struct request_queue *q, struct bio * bi) (unsigned long long)logical_sector); sh = get_active_stripe(conf, new_sector, previous, - (bi->bi_rw&RWA_MASK)); + (bi->bi_rw&RWA_MASK), 0); if (sh) { if (unlikely(previous)) { /* expansion might have moved on while waiting for a @@ -3811,13 +3811,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped safepos = conf->reshape_safe; sector_div(safepos, data_disks); if (mddev->delta_disks < 0) { - writepos -= reshape_sectors; + writepos -= min_t(sector_t, reshape_sectors, writepos); readpos += reshape_sectors; safepos += reshape_sectors; } else { writepos += reshape_sectors; - readpos -= reshape_sectors; - safepos -= reshape_sectors; + readpos -= min_t(sector_t, reshape_sectors, readpos); + safepos -= min_t(sector_t, reshape_sectors, safepos); } /* 'writepos' is the most advanced device address we might write. @@ -3873,7 +3873,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) { int j; int skipped = 0; - sh = get_active_stripe(conf, stripe_addr+i, 0, 0); + sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1); set_bit(STRIPE_EXPANDING, &sh->state); atomic_inc(&conf->reshape_stripes); /* If any of this stripe is beyond the end of the old @@ -3916,13 +3916,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped raid5_compute_sector(conf, stripe_addr*(new_data_disks), 1, &dd_idx, NULL); last_sector = - raid5_compute_sector(conf, ((stripe_addr+conf->chunk_size/512) + raid5_compute_sector(conf, ((stripe_addr+reshape_sectors) *(new_data_disks) - 1), 1, &dd_idx, NULL); if (last_sector >= mddev->dev_sectors) last_sector = mddev->dev_sectors - 1; while (first_sector <= last_sector) { - sh = get_active_stripe(conf, first_sector, 1, 0); + sh = get_active_stripe(conf, first_sector, 1, 0, 1); set_bit(STRIPE_EXPAND_SOURCE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state); release_stripe(sh); @@ -4022,9 +4022,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski bitmap_cond_end_sync(mddev->bitmap, sector_nr); - sh = get_active_stripe(conf, sector_nr, 0, 1); + sh = get_active_stripe(conf, sector_nr, 0, 1, 0); if (sh == NULL) { - sh = get_active_stripe(conf, sector_nr, 0, 0); + sh = get_active_stripe(conf, sector_nr, 0, 0, 0); /* make sure we don't swamp the stripe cache if someone else * is trying to get access */ @@ -4034,7 +4034,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski * We don't need to check the 'failed' flag as when that gets set, * recovery aborts. */ - for (i=0; i<mddev->raid_disks; i++) + for (i = 0; i < conf->raid_disks; i++) if (conf->disks[i].rdev == NULL) still_degraded = 1; @@ -4086,7 +4086,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) /* already done this stripe */ continue; - sh = get_active_stripe(conf, sector, 0, 1); + sh = get_active_stripe(conf, sector, 0, 1, 0); if (!sh) { /* failed to get a stripe - must wait */ diff --git a/drivers/media/video/ivtv/ivtv-queue.c b/drivers/media/video/ivtv/ivtv-queue.c index ff7b7deded4f..7fde36e6d227 100644 --- a/drivers/media/video/ivtv/ivtv-queue.c +++ b/drivers/media/video/ivtv/ivtv-queue.c @@ -230,7 +230,8 @@ int ivtv_stream_alloc(struct ivtv_stream *s) return -ENOMEM; } if (ivtv_might_use_dma(s)) { - s->sg_handle = pci_map_single(itv->pdev, s->sg_dma, sizeof(struct ivtv_sg_element), s->dma); + s->sg_handle = pci_map_single(itv->pdev, s->sg_dma, + sizeof(struct ivtv_sg_element), PCI_DMA_TODEVICE); ivtv_stream_sync_for_cpu(s); } diff --git a/drivers/mfd/pcf50633-core.c b/drivers/mfd/pcf50633-core.c index 7793932a513b..11a6248cc1c1 100644 --- a/drivers/mfd/pcf50633-core.c +++ b/drivers/mfd/pcf50633-core.c @@ -443,7 +443,7 @@ static irqreturn_t pcf50633_irq(int irq, void *data) dev_dbg(pcf->dev, "pcf50633_irq\n"); get_device(pcf->dev); - disable_irq(pcf->irq); + disable_irq_nosync(pcf->irq); schedule_work(&pcf->irq_work); return IRQ_HANDLED; diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c index c2be3088e2e1..fe24079387c5 100644 --- a/drivers/mfd/wm8350-core.c +++ b/drivers/mfd/wm8350-core.c @@ -79,10 +79,6 @@ static int wm8350_phys_read(struct wm8350 *wm8350, u8 reg, int num_regs, /* Cache is CPU endian */ dest[i - reg] = be16_to_cpu(dest[i - reg]); - /* Satisfy non-volatile bits from cache */ - dest[i - reg] &= wm8350_reg_io_map[i].vol; - dest[i - reg] |= wm8350->reg_cache[i]; - /* Mask out non-readable bits */ dest[i - reg] &= wm8350_reg_io_map[i].readable; } @@ -182,9 +178,6 @@ static int wm8350_write(struct wm8350 *wm8350, u8 reg, int num_regs, u16 *src) (wm8350->reg_cache[i] & ~wm8350_reg_io_map[i].writable) | src[i - reg]; - /* Don't store volatile bits */ - wm8350->reg_cache[i] &= ~wm8350_reg_io_map[i].vol; - src[i - reg] = cpu_to_be16(src[i - reg]); } @@ -1261,7 +1254,6 @@ static int wm8350_create_cache(struct wm8350 *wm8350, int type, int mode) (i < WM8350_CLOCK_CONTROL_1 || i > WM8350_AIF_TEST)) { value = be16_to_cpu(wm8350->reg_cache[i]); value &= wm8350_reg_io_map[i].readable; - value &= ~wm8350_reg_io_map[i].vol; wm8350->reg_cache[i] = value; } else wm8350->reg_cache[i] = reg_map[i]; diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c index 3cf61ece71d7..348443bdb23b 100644 --- a/drivers/misc/enclosure.c +++ b/drivers/misc/enclosure.c @@ -119,7 +119,7 @@ enclosure_register(struct device *dev, const char *name, int components, edev->edev.class = &enclosure_class; edev->edev.parent = get_device(dev); edev->cb = cb; - dev_set_name(&edev->edev, name); + dev_set_name(&edev->edev, "%s", name); err = device_register(&edev->edev); if (err) goto err; @@ -255,8 +255,8 @@ enclosure_component_register(struct enclosure_device *edev, ecomp->number = number; cdev = &ecomp->cdev; cdev->parent = get_device(&edev->edev); - if (name) - dev_set_name(cdev, name); + if (name && name[0]) + dev_set_name(cdev, "%s", name); else dev_set_name(cdev, "%u", number); diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 36875dcfa492..7d4febdab286 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -490,7 +490,7 @@ static void mmci_check_status(unsigned long data) mod_timer(&host->timer, jiffies + HZ); } -static int __devinit mmci_probe(struct amba_device *dev, void *id) +static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id) { struct mmc_platform_data *plat = dev->dev.platform_data; struct mmci_host *host; diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c index c643d0fe118f..b56d72ff06e9 100644 --- a/drivers/mmc/host/mvsdio.c +++ b/drivers/mmc/host/mvsdio.c @@ -64,6 +64,31 @@ static int mvsd_setup_data(struct mvsd_host *host, struct mmc_data *data) unsigned int tmout; int tmout_index; + /* + * Hardware weirdness. The FIFO_EMPTY bit of the HW_STATE + * register is sometimes not set before a while when some + * "unusual" data block sizes are used (such as with the SWITCH + * command), even despite the fact that the XFER_DONE interrupt + * was raised. And if another data transfer starts before + * this bit comes to good sense (which eventually happens by + * itself) then the new transfer simply fails with a timeout. + */ + if (!(mvsd_read(MVSD_HW_STATE) & (1 << 13))) { + unsigned long t = jiffies + HZ; + unsigned int hw_state, count = 0; + do { + if (time_after(jiffies, t)) { + dev_warn(host->dev, "FIFO_EMPTY bit missing\n"); + break; + } + hw_state = mvsd_read(MVSD_HW_STATE); + count++; + } while (!(hw_state & (1 << 13))); + dev_dbg(host->dev, "*** wait for FIFO_EMPTY bit " + "(hw=0x%04x, count=%d, jiffies=%ld)\n", + hw_state, count, jiffies - (t - HZ)); + } + /* If timeout=0 then maximum timeout index is used. */ tmout = DIV_ROUND_UP(data->timeout_ns, host->ns_per_clk); tmout += data->timeout_clks; @@ -620,9 +645,18 @@ static void mvsd_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) if (ios->bus_width == MMC_BUS_WIDTH_4) ctrl_reg |= MVSD_HOST_CTRL_DATA_WIDTH_4_BITS; + /* + * The HI_SPEED_EN bit is causing trouble with many (but not all) + * high speed SD, SDHC and SDIO cards. Not enabling that bit + * makes all cards work. So let's just ignore that bit for now + * and revisit this issue if problems for not enabling this bit + * are ever reported. + */ +#if 0 if (ios->timing == MMC_TIMING_MMC_HS || ios->timing == MMC_TIMING_SD_HS) ctrl_reg |= MVSD_HOST_CTRL_HI_SPEED_EN; +#endif host->ctrl = ctrl_reg; mvsd_write(MVSD_HOST_CTRL, ctrl_reg); @@ -882,3 +916,4 @@ module_param(nodma, int, 0); MODULE_AUTHOR("Maen Suleiman, Nicolas Pitre"); MODULE_DESCRIPTION("Marvell MMC,SD,SDIO Host Controller driver"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:mvsdio"); diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c index b4a615c55f28..f4cbe473670e 100644 --- a/drivers/mmc/host/mxcmmc.c +++ b/drivers/mmc/host/mxcmmc.c @@ -140,6 +140,8 @@ struct mxcmci_host { struct work_struct datawork; }; +static void mxcmci_set_clk_rate(struct mxcmci_host *host, unsigned int clk_ios); + static inline int mxcmci_use_dma(struct mxcmci_host *host) { return host->do_dma; @@ -160,7 +162,7 @@ static void mxcmci_softreset(struct mxcmci_host *host) writew(0xff, host->base + MMC_REG_RES_TO); } -static void mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data) +static int mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data) { unsigned int nob = data->blocks; unsigned int blksz = data->blksz; @@ -168,6 +170,7 @@ static void mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data) #ifdef HAS_DMA struct scatterlist *sg; int i; + int ret; #endif if (data->flags & MMC_DATA_STREAM) nob = 0xffff; @@ -183,7 +186,7 @@ static void mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data) for_each_sg(data->sg, sg, data->sg_len, i) { if (sg->offset & 3 || sg->length & 3) { host->do_dma = 0; - return; + return 0; } } @@ -192,23 +195,30 @@ static void mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data) host->dma_nents = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len, host->dma_dir); - imx_dma_setup_sg(host->dma, data->sg, host->dma_nents, datasize, - host->res->start + MMC_REG_BUFFER_ACCESS, - DMA_MODE_READ); + ret = imx_dma_setup_sg(host->dma, data->sg, host->dma_nents, + datasize, + host->res->start + MMC_REG_BUFFER_ACCESS, + DMA_MODE_READ); } else { host->dma_dir = DMA_TO_DEVICE; host->dma_nents = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len, host->dma_dir); - imx_dma_setup_sg(host->dma, data->sg, host->dma_nents, datasize, - host->res->start + MMC_REG_BUFFER_ACCESS, - DMA_MODE_WRITE); + ret = imx_dma_setup_sg(host->dma, data->sg, host->dma_nents, + datasize, + host->res->start + MMC_REG_BUFFER_ACCESS, + DMA_MODE_WRITE); } + if (ret) { + dev_err(mmc_dev(host->mmc), "failed to setup DMA : %d\n", ret); + return ret; + } wmb(); imx_dma_enable(host->dma); #endif /* HAS_DMA */ + return 0; } static int mxcmci_start_cmd(struct mxcmci_host *host, struct mmc_command *cmd, @@ -345,8 +355,11 @@ static int mxcmci_poll_status(struct mxcmci_host *host, u32 mask) stat = readl(host->base + MMC_REG_STATUS); if (stat & STATUS_ERR_MASK) return stat; - if (time_after(jiffies, timeout)) + if (time_after(jiffies, timeout)) { + mxcmci_softreset(host); + mxcmci_set_clk_rate(host, host->clock); return STATUS_TIME_OUT_READ; + } if (stat & mask) return 0; cpu_relax(); @@ -531,6 +544,7 @@ static void mxcmci_request(struct mmc_host *mmc, struct mmc_request *req) { struct mxcmci_host *host = mmc_priv(mmc); unsigned int cmdat = host->cmdat; + int error; WARN_ON(host->req != NULL); @@ -540,7 +554,12 @@ static void mxcmci_request(struct mmc_host *mmc, struct mmc_request *req) host->do_dma = 1; #endif if (req->data) { - mxcmci_setup_data(host, req->data); + error = mxcmci_setup_data(host, req->data); + if (error) { + req->cmd->error = error; + goto out; + } + cmdat |= CMD_DAT_CONT_DATA_ENABLE; @@ -548,7 +567,9 @@ static void mxcmci_request(struct mmc_host *mmc, struct mmc_request *req) cmdat |= CMD_DAT_CONT_WRITE; } - if (mxcmci_start_cmd(host, req->cmd, cmdat)) + error = mxcmci_start_cmd(host, req->cmd, cmdat); +out: + if (error) mxcmci_finish_request(host, req); } @@ -724,7 +745,9 @@ static int mxcmci_probe(struct platform_device *pdev) goto out_clk_put; } - mmc->f_min = clk_get_rate(host->clk) >> 7; + mmc->f_min = clk_get_rate(host->clk) >> 16; + if (mmc->f_min < 400000) + mmc->f_min = 400000; mmc->f_max = clk_get_rate(host->clk) >> 1; /* recommended in data sheet */ diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c index bfa25c01c872..dceb5ee3bda0 100644 --- a/drivers/mmc/host/omap.c +++ b/drivers/mmc/host/omap.c @@ -822,7 +822,7 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id) del_timer(&host->cmd_abort_timer); host->abort = 1; OMAP_MMC_WRITE(host, IE, 0); - disable_irq(host->irq); + disable_irq_nosync(host->irq); schedule_work(&host->cmd_abort_work); return IRQ_HANDLED; } diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index e62a22a7f00c..c40cb96255a2 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -680,7 +680,7 @@ static void mmc_omap_dma_cb(int lch, u16 ch_status, void *data) host->dma_ch = -1; /* * DMA Callback: run in interrupt context. - * mutex_unlock will through a kernel warning if used. + * mutex_unlock will throw a kernel warning if used. */ up(&host->sem); } diff --git a/drivers/mmc/host/sdhci-of.c b/drivers/mmc/host/sdhci-of.c index 3ff4ac3abe8b..128c614d11aa 100644 --- a/drivers/mmc/host/sdhci-of.c +++ b/drivers/mmc/host/sdhci-of.c @@ -55,7 +55,13 @@ static u32 esdhc_readl(struct sdhci_host *host, int reg) static u16 esdhc_readw(struct sdhci_host *host, int reg) { - return in_be16(host->ioaddr + (reg ^ 0x2)); + u16 ret; + + if (unlikely(reg == SDHCI_HOST_VERSION)) + ret = in_be16(host->ioaddr + reg); + else + ret = in_be16(host->ioaddr + (reg ^ 0x2)); + return ret; } static u8 esdhc_readb(struct sdhci_host *host, int reg) @@ -277,6 +283,7 @@ static int __devexit sdhci_of_remove(struct of_device *ofdev) static const struct of_device_id sdhci_of_match[] = { { .compatible = "fsl,mpc8379-esdhc", .data = &sdhci_esdhc, }, { .compatible = "fsl,mpc8536-esdhc", .data = &sdhci_esdhc, }, + { .compatible = "fsl,esdhc", .data = &sdhci_esdhc, }, { .compatible = "generic-sdhci", }, {}, }; diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c index 62dee54af0a5..43976aa4dbb1 100644 --- a/drivers/mtd/devices/mtd_dataflash.c +++ b/drivers/mtd/devices/mtd_dataflash.c @@ -178,7 +178,7 @@ static int dataflash_erase(struct mtd_info *mtd, struct erase_info *instr) /* Calculate flash page address; use block erase (for speed) if * we're at a block boundary and need to erase the whole block. */ - pageaddr = div_u64(instr->len, priv->page_size); + pageaddr = div_u64(instr->addr, priv->page_size); do_block = (pageaddr & 0x7) == 0 && instr->len >= blocksize; pageaddr = pageaddr << priv->page_offset; diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c index 0119220de7d0..02700f769b8a 100644 --- a/drivers/mtd/nand/davinci_nand.c +++ b/drivers/mtd/nand/davinci_nand.c @@ -407,16 +407,17 @@ static int __init nand_davinci_probe(struct platform_device *pdev) } info->chip.ecc.mode = ecc_mode; - info->clk = clk_get(&pdev->dev, "AEMIFCLK"); + info->clk = clk_get(&pdev->dev, "aemif"); if (IS_ERR(info->clk)) { ret = PTR_ERR(info->clk); - dev_dbg(&pdev->dev, "unable to get AEMIFCLK, err %d\n", ret); + dev_dbg(&pdev->dev, "unable to get AEMIF clock, err %d\n", ret); goto err_clk; } ret = clk_enable(info->clk); if (ret < 0) { - dev_dbg(&pdev->dev, "unable to enable AEMIFCLK, err %d\n", ret); + dev_dbg(&pdev->dev, "unable to enable AEMIF clock, err %d\n", + ret); goto err_clk_enable; } diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c index f3548d048014..40c26080ecda 100644 --- a/drivers/mtd/nand/mxc_nand.c +++ b/drivers/mtd/nand/mxc_nand.c @@ -831,6 +831,7 @@ static void mxc_nand_command(struct mtd_info *mtd, unsigned command, break; case NAND_CMD_READID: + host->col_addr = 0; send_read_id(host); break; @@ -867,6 +868,7 @@ static int __init mxcnd_probe(struct platform_device *pdev) mtd->priv = this; mtd->owner = THIS_MODULE; mtd->dev.parent = &pdev->dev; + mtd->name = "mxc_nand"; /* 50 us command delay time */ this->chip_delay = 5; @@ -882,8 +884,10 @@ static int __init mxcnd_probe(struct platform_device *pdev) this->verify_buf = mxc_nand_verify_buf; host->clk = clk_get(&pdev->dev, "nfc"); - if (IS_ERR(host->clk)) + if (IS_ERR(host->clk)) { + err = PTR_ERR(host->clk); goto eclk; + } clk_enable(host->clk); host->clk_act = 1; @@ -896,7 +900,7 @@ static int __init mxcnd_probe(struct platform_device *pdev) host->regs = ioremap(res->start, res->end - res->start + 1); if (!host->regs) { - err = -EIO; + err = -ENOMEM; goto eres; } @@ -1011,30 +1015,35 @@ static int __devexit mxcnd_remove(struct platform_device *pdev) #ifdef CONFIG_PM static int mxcnd_suspend(struct platform_device *pdev, pm_message_t state) { - struct mtd_info *info = platform_get_drvdata(pdev); + struct mtd_info *mtd = platform_get_drvdata(pdev); + struct nand_chip *nand_chip = mtd->priv; + struct mxc_nand_host *host = nand_chip->priv; int ret = 0; DEBUG(MTD_DEBUG_LEVEL0, "MXC_ND : NAND suspend\n"); - if (info) - ret = info->suspend(info); - - /* Disable the NFC clock */ - clk_disable(nfc_clk); /* FIXME */ + if (mtd) { + ret = mtd->suspend(mtd); + /* Disable the NFC clock */ + clk_disable(host->clk); + } return ret; } static int mxcnd_resume(struct platform_device *pdev) { - struct mtd_info *info = platform_get_drvdata(pdev); + struct mtd_info *mtd = platform_get_drvdata(pdev); + struct nand_chip *nand_chip = mtd->priv; + struct mxc_nand_host *host = nand_chip->priv; int ret = 0; DEBUG(MTD_DEBUG_LEVEL0, "MXC_ND : NAND resume\n"); - /* Enable the NFC clock */ - clk_enable(nfc_clk); /* FIXME */ - if (info) - info->resume(info); + if (mtd) { + /* Enable the NFC clock */ + clk_enable(host->clk); + mtd->resume(mtd); + } return ret; } @@ -1055,13 +1064,7 @@ static struct platform_driver mxcnd_driver = { static int __init mxc_nd_init(void) { - /* Register the device driver structure. */ - pr_info("MXC MTD nand Driver\n"); - if (platform_driver_probe(&mxcnd_driver, mxcnd_probe) != 0) { - printk(KERN_ERR "Driver register failed for mxcnd_driver\n"); - return -ENODEV; - } - return 0; + return platform_driver_probe(&mxcnd_driver, mxcnd_probe); } static void __exit mxc_nd_cleanup(void) diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c index fbb371921991..682aad897081 100644 --- a/drivers/net/3c509.c +++ b/drivers/net/3c509.c @@ -480,9 +480,13 @@ static int pnp_registered; #ifdef CONFIG_EISA static struct eisa_device_id el3_eisa_ids[] = { + { "TCM5090" }, + { "TCM5091" }, { "TCM5092" }, { "TCM5093" }, + { "TCM5094" }, { "TCM5095" }, + { "TCM5098" }, { "" } }; MODULE_DEVICE_TABLE(eisa, el3_eisa_ids); diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 1fc4602a6ff2..a1c25cb4669f 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -102,7 +102,7 @@ obj-$(CONFIG_HAMACHI) += hamachi.o obj-$(CONFIG_NET) += Space.o loopback.o obj-$(CONFIG_SEEQ8005) += seeq8005.o obj-$(CONFIG_NET_SB1000) += sb1000.o -obj-$(CONFIG_MAC8390) += mac8390.o 8390.o +obj-$(CONFIG_MAC8390) += mac8390.o obj-$(CONFIG_APNE) += apne.o 8390.o obj-$(CONFIG_PCMCIA_PCNET) += 8390.o obj-$(CONFIG_HP100) += hp100.o diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c index fb57b750866b..1342418fb209 100644 --- a/drivers/net/atl1e/atl1e_main.c +++ b/drivers/net/atl1e/atl1e_main.c @@ -37,6 +37,7 @@ char atl1e_driver_version[] = DRV_VERSION; */ static struct pci_device_id atl1e_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, PCI_DEVICE_ID_ATTANSIC_L1E)}, + {PCI_DEVICE(PCI_VENDOR_ID_ATTANSIC, 0x1066)}, /* required last entry */ { 0 } }; diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c index 0ab22540bf59..4e817126e280 100644 --- a/drivers/net/atlx/atl1.c +++ b/drivers/net/atlx/atl1.c @@ -82,6 +82,12 @@ #include "atl1.h" +#define ATLX_DRIVER_VERSION "2.1.3" +MODULE_AUTHOR("Xiong Huang <xiong.huang@atheros.com>, \ + Chris Snook <csnook@redhat.com>, Jay Cliburn <jcliburn@gmail.com>"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(ATLX_DRIVER_VERSION); + /* Temporary hack for merging atl1 and atl2 */ #include "atlx.c" diff --git a/drivers/net/atlx/atlx.h b/drivers/net/atlx/atlx.h index 297a03da6b7f..14054b75aa62 100644 --- a/drivers/net/atlx/atlx.h +++ b/drivers/net/atlx/atlx.h @@ -29,12 +29,6 @@ #include <linux/module.h> #include <linux/types.h> -#define ATLX_DRIVER_VERSION "2.1.3" -MODULE_AUTHOR("Xiong Huang <xiong.huang@atheros.com>, \ - Chris Snook <csnook@redhat.com>, Jay Cliburn <jcliburn@gmail.com>"); -MODULE_LICENSE("GPL"); -MODULE_VERSION(ATLX_DRIVER_VERSION); - #define ATLX_ERR_PHY 2 #define ATLX_ERR_PHY_SPEED 7 #define ATLX_ERR_PHY_RES 8 diff --git a/drivers/net/benet/be.h b/drivers/net/benet/be.h index c49ddd08b2aa..b4bb06fdf307 100644 --- a/drivers/net/benet/be.h +++ b/drivers/net/benet/be.h @@ -35,8 +35,22 @@ #define DRV_VER "2.0.348" #define DRV_NAME "be2net" #define BE_NAME "ServerEngines BladeEngine2 10Gbps NIC" +#define OC_NAME "Emulex OneConnect 10Gbps NIC" #define DRV_DESC BE_NAME "Driver" +#define BE_VENDOR_ID 0x19a2 +#define BE_DEVICE_ID1 0x211 +#define OC_DEVICE_ID1 0x700 +#define OC_DEVICE_ID2 0x701 + +static inline char *nic_name(struct pci_dev *pdev) +{ + if (pdev->device == OC_DEVICE_ID1 || pdev->device == OC_DEVICE_ID2) + return OC_NAME; + else + return BE_NAME; +} + /* Number of bytes of an RX frame that are copied to skb->data */ #define BE_HDR_LEN 64 #define BE_MAX_JUMBO_FRAME_SIZE 9018 diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c index 30d0c81c989e..5c378b5e8e41 100644 --- a/drivers/net/benet/be_main.c +++ b/drivers/net/benet/be_main.c @@ -28,10 +28,10 @@ static unsigned int rx_frag_size = 2048; module_param(rx_frag_size, uint, S_IRUGO); MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data."); -#define BE_VENDOR_ID 0x19a2 -#define BE2_DEVICE_ID_1 0x0211 static DEFINE_PCI_DEVICE_TABLE(be_dev_ids) = { - { PCI_DEVICE(BE_VENDOR_ID, BE2_DEVICE_ID_1) }, + { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) }, + { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) }, + { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) }, { 0 } }; MODULE_DEVICE_TABLE(pci, be_dev_ids); @@ -1859,7 +1859,7 @@ static int __devinit be_probe(struct pci_dev *pdev, if (status != 0) goto stats_clean; - dev_info(&pdev->dev, BE_NAME " port %d\n", adapter->port_num); + dev_info(&pdev->dev, "%s port %d\n", nic_name(pdev), adapter->port_num); return 0; stats_clean: @@ -1873,7 +1873,7 @@ rel_reg: disable_dev: pci_disable_device(pdev); do_none: - dev_warn(&pdev->dev, BE_NAME " initialization failed\n"); + dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev)); return status; } diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c index 9f971ed6b58d..b4da18213324 100644 --- a/drivers/net/bfin_mac.c +++ b/drivers/net/bfin_mac.c @@ -979,22 +979,7 @@ static int bfin_mac_open(struct net_device *dev) return 0; } -static const struct net_device_ops bfin_mac_netdev_ops = { - .ndo_open = bfin_mac_open, - .ndo_stop = bfin_mac_close, - .ndo_start_xmit = bfin_mac_hard_start_xmit, - .ndo_set_mac_address = bfin_mac_set_mac_address, - .ndo_tx_timeout = bfin_mac_timeout, - .ndo_set_multicast_list = bfin_mac_set_multicast_list, - .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = eth_change_mtu, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = bfin_mac_poll, -#endif -}; - /* - * * this makes the board clean up everything that it can * and not talk to the outside world. Caused by * an 'ifconfig ethX down' @@ -1019,6 +1004,20 @@ static int bfin_mac_close(struct net_device *dev) return 0; } +static const struct net_device_ops bfin_mac_netdev_ops = { + .ndo_open = bfin_mac_open, + .ndo_stop = bfin_mac_close, + .ndo_start_xmit = bfin_mac_hard_start_xmit, + .ndo_set_mac_address = bfin_mac_set_mac_address, + .ndo_tx_timeout = bfin_mac_timeout, + .ndo_set_multicast_list = bfin_mac_set_multicast_list, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = eth_change_mtu, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = bfin_mac_poll, +#endif +}; + static int __devinit bfin_mac_probe(struct platform_device *pdev) { struct net_device *ndev; diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 8c2e5ab51f08..faf094abef7f 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -1465,6 +1465,12 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best, return best; } +static int agg_device_up(const struct aggregator *agg) +{ + return (netif_running(agg->slave->dev) && + netif_carrier_ok(agg->slave->dev)); +} + /** * ad_agg_selection_logic - select an aggregation group for a team * @aggregator: the aggregator we're looking at @@ -1496,14 +1502,13 @@ static void ad_agg_selection_logic(struct aggregator *agg) struct port *port; origin = agg; - active = __get_active_agg(agg); - best = active; + best = (active && agg_device_up(active)) ? active : NULL; do { agg->is_active = 0; - if (agg->num_of_ports) + if (agg->num_of_ports && agg_device_up(agg)) best = ad_agg_selection_test(best, agg); } while ((agg = __get_next_agg(agg))); diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h index 714df2b675e6..c888e97c9671 100644 --- a/drivers/net/cxgb3/adapter.h +++ b/drivers/net/cxgb3/adapter.h @@ -85,8 +85,8 @@ struct fl_pg_chunk { struct page *page; void *va; unsigned int offset; - u64 *p_cnt; - DECLARE_PCI_UNMAP_ADDR(mapping); + unsigned long *p_cnt; + dma_addr_t mapping; }; struct rx_desc; diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c index 7ea48414c6cb..17858b9a5830 100644 --- a/drivers/net/cxgb3/cxgb3_main.c +++ b/drivers/net/cxgb3/cxgb3_main.c @@ -2496,14 +2496,16 @@ static void check_link_status(struct adapter *adapter) for_each_port(adapter, i) { struct net_device *dev = adapter->port[i]; struct port_info *p = netdev_priv(dev); + int link_fault; spin_lock_irq(&adapter->work_lock); - if (p->link_fault) { + link_fault = p->link_fault; + spin_unlock_irq(&adapter->work_lock); + + if (link_fault) { t3_link_fault(adapter, i); - spin_unlock_irq(&adapter->work_lock); continue; } - spin_unlock_irq(&adapter->work_lock); if (!(p->phy.caps & SUPPORTED_IRQ) && netif_running(dev)) { t3_xgm_intr_disable(adapter, i); diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 26d3587f3399..b3ee2bc1a005 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -355,7 +355,7 @@ static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q, (*d->pg_chunk.p_cnt)--; if (!*d->pg_chunk.p_cnt) pci_unmap_page(pdev, - pci_unmap_addr(&d->pg_chunk, mapping), + d->pg_chunk.mapping, q->alloc_size, PCI_DMA_FROMDEVICE); put_page(d->pg_chunk.page); @@ -454,7 +454,7 @@ static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q, q->pg_chunk.offset = 0; mapping = pci_map_page(adapter->pdev, q->pg_chunk.page, 0, q->alloc_size, PCI_DMA_FROMDEVICE); - pci_unmap_addr_set(&q->pg_chunk, mapping, mapping); + q->pg_chunk.mapping = mapping; } sd->pg_chunk = q->pg_chunk; @@ -511,8 +511,7 @@ static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp) nomem: q->alloc_failed++; break; } - mapping = pci_unmap_addr(&sd->pg_chunk, mapping) + - sd->pg_chunk.offset; + mapping = sd->pg_chunk.mapping + sd->pg_chunk.offset; pci_unmap_addr_set(sd, dma_addr, mapping); add_one_rx_chunk(mapping, d, q->gen); @@ -881,7 +880,7 @@ recycle: (*sd->pg_chunk.p_cnt)--; if (!*sd->pg_chunk.p_cnt) pci_unmap_page(adap->pdev, - pci_unmap_addr(&sd->pg_chunk, mapping), + sd->pg_chunk.mapping, fl->alloc_size, PCI_DMA_FROMDEVICE); if (!skb) { @@ -2096,7 +2095,7 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, (*sd->pg_chunk.p_cnt)--; if (!*sd->pg_chunk.p_cnt) pci_unmap_page(adap->pdev, - pci_unmap_addr(&sd->pg_chunk, mapping), + sd->pg_chunk.mapping, fl->alloc_size, PCI_DMA_FROMDEVICE); diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c index 4f68aeb2679a..4950d5d789ae 100644 --- a/drivers/net/cxgb3/t3_hw.c +++ b/drivers/net/cxgb3/t3_hw.c @@ -1274,6 +1274,11 @@ void t3_link_fault(struct adapter *adapter, int port_id) A_XGM_INT_STATUS + mac->offset); link_fault &= F_LINKFAULTCHANGE; + link_ok = lc->link_ok; + speed = lc->speed; + duplex = lc->duplex; + fc = lc->fc; + phy->ops->get_link_status(phy, &link_ok, &speed, &duplex, &fc); if (link_fault) { diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index b1419e21b46b..fffb006b7d95 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -4027,8 +4027,9 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, PCI_DMA_FROMDEVICE); length = le16_to_cpu(rx_desc->length); - - if (unlikely(!(status & E1000_RXD_STAT_EOP))) { + /* !EOP means multiple descriptors were used to store a single + * packet, also make sure the frame isn't just CRC only */ + if (unlikely(!(status & E1000_RXD_STAT_EOP) || (length <= 4))) { /* All receives must fit into a single buffer */ E1000_DBG("%s: Receive packet consumed multiple" " buffers\n", netdev->name); diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index f9a846b1b92f..9f6a68fb7b45 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -897,6 +897,12 @@ enum { }; static int phy_cross = NV_CROSSOVER_DETECTION_DISABLED; +/* + * Power down phy when interface is down (persists through reboot; + * older Linux and other OSes may not power it up again) + */ +static int phy_power_down = 0; + static inline struct fe_priv *get_nvpriv(struct net_device *dev) { return netdev_priv(dev); @@ -1485,7 +1491,10 @@ static int phy_init(struct net_device *dev) /* restart auto negotiation, power down phy */ mii_control = mii_rw(dev, np->phyaddr, MII_BMCR, MII_READ); - mii_control |= (BMCR_ANRESTART | BMCR_ANENABLE | BMCR_PDOWN); + mii_control |= (BMCR_ANRESTART | BMCR_ANENABLE); + if (phy_power_down) { + mii_control |= BMCR_PDOWN; + } if (mii_rw(dev, np->phyaddr, MII_BMCR, mii_control)) { return PHY_ERROR; } @@ -5513,7 +5522,7 @@ static int nv_close(struct net_device *dev) nv_drain_rxtx(dev); - if (np->wolenabled) { + if (np->wolenabled || !phy_power_down) { writel(NVREG_PFF_ALWAYS|NVREG_PFF_MYADDR, base + NvRegPacketFilterFlags); nv_start_rx(dev); } else { @@ -6367,6 +6376,8 @@ module_param(dma_64bit, int, 0); MODULE_PARM_DESC(dma_64bit, "High DMA is enabled by setting to 1 and disabled by setting to 0."); module_param(phy_cross, int, 0); MODULE_PARM_DESC(phy_cross, "Phy crossover detection for Realtek 8201 phy is enabled by setting to 1 and disabled by setting to 0."); +module_param(phy_power_down, int, 0); +MODULE_PARM_DESC(phy_power_down, "Power down phy and disable link when interface is down (1), or leave phy powered up (0)."); MODULE_AUTHOR("Manfred Spraul <manfred@colorfullife.com>"); MODULE_DESCRIPTION("Reverse Engineered nForce ethernet driver"); diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index b2c49679bba7..a0519184e54e 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -1885,8 +1885,17 @@ int gfar_clean_rx_ring(struct net_device *dev, int rx_work_limit) if (unlikely(!newskb)) newskb = skb; - else if (skb) + else if (skb) { + /* + * We need to reset ->data to what it + * was before gfar_new_skb() re-aligned + * it to an RXBUF_ALIGNMENT boundary + * before we put the skb back on the + * recycle list. + */ + skb->data = skb->head + NET_SKB_PAD; __skb_queue_head(&priv->rx_recycle, skb); + } } else { /* Increment the number of packets */ dev->stats.rx_packets++; diff --git a/drivers/net/gianfar.h b/drivers/net/gianfar.h index 0642d52aef5c..cf352961ae9b 100644 --- a/drivers/net/gianfar.h +++ b/drivers/net/gianfar.h @@ -259,7 +259,7 @@ extern const char gfar_driver_version[]; (IEVENT_RXC | IEVENT_BSY | IEVENT_EBERR | IEVENT_MSRO | \ IEVENT_BABT | IEVENT_TXC | IEVENT_TXE | IEVENT_LC \ | IEVENT_CRL | IEVENT_XFUN | IEVENT_DPE | IEVENT_PERR \ - | IEVENT_MAG) + | IEVENT_MAG | IEVENT_BABR) #define IMASK_INIT_CLEAR 0x00000000 #define IMASK_BABR 0x80000000 diff --git a/drivers/net/mac8390.c b/drivers/net/mac8390.c index 8e884869a05b..22e74a0e0361 100644 --- a/drivers/net/mac8390.c +++ b/drivers/net/mac8390.c @@ -304,7 +304,7 @@ struct net_device * __init mac8390_probe(int unit) if (!MACH_IS_MAC) return ERR_PTR(-ENODEV); - dev = alloc_ei_netdev(); + dev = ____alloc_ei_netdev(0); if (!dev) return ERR_PTR(-ENOMEM); @@ -481,15 +481,15 @@ void cleanup_module(void) static const struct net_device_ops mac8390_netdev_ops = { .ndo_open = mac8390_open, .ndo_stop = mac8390_close, - .ndo_start_xmit = ei_start_xmit, - .ndo_tx_timeout = ei_tx_timeout, - .ndo_get_stats = ei_get_stats, - .ndo_set_multicast_list = ei_set_multicast_list, + .ndo_start_xmit = __ei_start_xmit, + .ndo_tx_timeout = __ei_tx_timeout, + .ndo_get_stats = __ei_get_stats, + .ndo_set_multicast_list = __ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = ei_poll, + .ndo_poll_controller = __ei_poll, #endif }; diff --git a/drivers/net/meth.c b/drivers/net/meth.c index aa08987f6e81..dbd3436912b8 100644 --- a/drivers/net/meth.c +++ b/drivers/net/meth.c @@ -127,11 +127,11 @@ static unsigned long mdio_read(struct meth_private *priv, unsigned long phyreg) static int mdio_probe(struct meth_private *priv) { int i; - unsigned long p2, p3; + unsigned long p2, p3, flags; /* check if phy is detected already */ if(priv->phy_addr>=0&&priv->phy_addr<32) return 0; - spin_lock(&priv->meth_lock); + spin_lock_irqsave(&priv->meth_lock, flags); for (i=0;i<32;++i){ priv->phy_addr=i; p2=mdio_read(priv,2); @@ -157,7 +157,7 @@ static int mdio_probe(struct meth_private *priv) break; } } - spin_unlock(&priv->meth_lock); + spin_unlock_irqrestore(&priv->meth_lock, flags); if(priv->phy_addr<32) { return 0; } @@ -373,14 +373,14 @@ static int meth_release(struct net_device *dev) static void meth_rx(struct net_device* dev, unsigned long int_status) { struct sk_buff *skb; - unsigned long status; + unsigned long status, flags; struct meth_private *priv = netdev_priv(dev); unsigned long fifo_rptr = (int_status & METH_INT_RX_RPTR_MASK) >> 8; - spin_lock(&priv->meth_lock); + spin_lock_irqsave(&priv->meth_lock, flags); priv->dma_ctrl &= ~METH_DMA_RX_INT_EN; mace->eth.dma_ctrl = priv->dma_ctrl; - spin_unlock(&priv->meth_lock); + spin_unlock_irqrestore(&priv->meth_lock, flags); if (int_status & METH_INT_RX_UNDERFLOW) { fifo_rptr = (fifo_rptr - 1) & 0x0f; @@ -452,12 +452,12 @@ static void meth_rx(struct net_device* dev, unsigned long int_status) mace->eth.rx_fifo = priv->rx_ring_dmas[priv->rx_write]; ADVANCE_RX_PTR(priv->rx_write); } - spin_lock(&priv->meth_lock); + spin_lock_irqsave(&priv->meth_lock, flags); /* In case there was underflow, and Rx DMA was disabled */ priv->dma_ctrl |= METH_DMA_RX_INT_EN | METH_DMA_RX_EN; mace->eth.dma_ctrl = priv->dma_ctrl; mace->eth.int_stat = METH_INT_RX_THRESHOLD; - spin_unlock(&priv->meth_lock); + spin_unlock_irqrestore(&priv->meth_lock, flags); } static int meth_tx_full(struct net_device *dev) @@ -470,11 +470,11 @@ static int meth_tx_full(struct net_device *dev) static void meth_tx_cleanup(struct net_device* dev, unsigned long int_status) { struct meth_private *priv = netdev_priv(dev); - unsigned long status; + unsigned long status, flags; struct sk_buff *skb; unsigned long rptr = (int_status&TX_INFO_RPTR) >> 16; - spin_lock(&priv->meth_lock); + spin_lock_irqsave(&priv->meth_lock, flags); /* Stop DMA notification */ priv->dma_ctrl &= ~(METH_DMA_TX_INT_EN); @@ -527,12 +527,13 @@ static void meth_tx_cleanup(struct net_device* dev, unsigned long int_status) } mace->eth.int_stat = METH_INT_TX_EMPTY | METH_INT_TX_PKT; - spin_unlock(&priv->meth_lock); + spin_unlock_irqrestore(&priv->meth_lock, flags); } static void meth_error(struct net_device* dev, unsigned status) { struct meth_private *priv = netdev_priv(dev); + unsigned long flags; printk(KERN_WARNING "meth: error status: 0x%08x\n",status); /* check for errors too... */ @@ -547,7 +548,7 @@ static void meth_error(struct net_device* dev, unsigned status) printk(KERN_WARNING "meth: Rx overflow\n"); if (status & (METH_INT_RX_UNDERFLOW)) { printk(KERN_WARNING "meth: Rx underflow\n"); - spin_lock(&priv->meth_lock); + spin_lock_irqsave(&priv->meth_lock, flags); mace->eth.int_stat = METH_INT_RX_UNDERFLOW; /* more underflow interrupts will be delivered, * effectively throwing us into an infinite loop. @@ -555,7 +556,7 @@ static void meth_error(struct net_device* dev, unsigned status) priv->dma_ctrl &= ~METH_DMA_RX_EN; mace->eth.dma_ctrl = priv->dma_ctrl; DPRINTK("Disabled meth Rx DMA temporarily\n"); - spin_unlock(&priv->meth_lock); + spin_unlock_irqrestore(&priv->meth_lock, flags); } mace->eth.int_stat = METH_INT_ERROR; } diff --git a/drivers/net/mlx4/en_cq.c b/drivers/net/mlx4/en_cq.c index 91f50de84be9..a276125b709b 100644 --- a/drivers/net/mlx4/en_cq.c +++ b/drivers/net/mlx4/en_cq.c @@ -125,8 +125,10 @@ void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) if (cq->is_tx) del_timer(&cq->timer); - else + else { napi_disable(&cq->napi); + netif_napi_del(&cq->napi); + } mlx4_cq_free(mdev->dev, &cq->mcq); } diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c index ac6fc499b280..e5c98a98ad37 100644 --- a/drivers/net/mlx4/en_tx.c +++ b/drivers/net/mlx4/en_tx.c @@ -426,7 +426,7 @@ void mlx4_en_poll_tx_cq(unsigned long data) INC_PERF_COUNTER(priv->pstats.tx_poll); - if (!spin_trylock(&ring->comp_lock)) { + if (!spin_trylock_irq(&ring->comp_lock)) { mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); return; } @@ -439,7 +439,7 @@ void mlx4_en_poll_tx_cq(unsigned long data) if (inflight && priv->port_up) mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); - spin_unlock(&ring->comp_lock); + spin_unlock_irq(&ring->comp_lock); } static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, @@ -482,9 +482,9 @@ static inline void mlx4_en_xmit_poll(struct mlx4_en_priv *priv, int tx_ind) /* Poll the CQ every mlx4_en_TX_MODER_POLL packets */ if ((++ring->poll_cnt & (MLX4_EN_TX_POLL_MODER - 1)) == 0) - if (spin_trylock(&ring->comp_lock)) { + if (spin_trylock_irq(&ring->comp_lock)) { mlx4_en_process_tx_cq(priv->dev, cq); - spin_unlock(&ring->comp_lock); + spin_unlock_irq(&ring->comp_lock); } } diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index a400d7115f78..6bb5af35eda6 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -569,7 +569,7 @@ static int rxq_process(struct rx_queue *rxq, int budget) if (rxq->rx_curr_desc == rxq->rx_ring_size) rxq->rx_curr_desc = 0; - dma_unmap_single(NULL, rx_desc->buf_ptr, + dma_unmap_single(mp->dev->dev.parent, rx_desc->buf_ptr, rx_desc->buf_size, DMA_FROM_DEVICE); rxq->rx_desc_count--; rx++; @@ -678,8 +678,9 @@ static int rxq_refill(struct rx_queue *rxq, int budget) rx_desc = rxq->rx_desc_area + rx; - rx_desc->buf_ptr = dma_map_single(NULL, skb->data, - mp->skb_size, DMA_FROM_DEVICE); + rx_desc->buf_ptr = dma_map_single(mp->dev->dev.parent, + skb->data, mp->skb_size, + DMA_FROM_DEVICE); rx_desc->buf_size = mp->skb_size; rxq->rx_skb[rx] = skb; wmb(); @@ -718,6 +719,7 @@ static inline unsigned int has_tiny_unaligned_frags(struct sk_buff *skb) static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb) { + struct mv643xx_eth_private *mp = txq_to_mp(txq); int nr_frags = skb_shinfo(skb)->nr_frags; int frag; @@ -746,10 +748,10 @@ static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb) desc->l4i_chk = 0; desc->byte_cnt = this_frag->size; - desc->buf_ptr = dma_map_page(NULL, this_frag->page, - this_frag->page_offset, - this_frag->size, - DMA_TO_DEVICE); + desc->buf_ptr = dma_map_page(mp->dev->dev.parent, + this_frag->page, + this_frag->page_offset, + this_frag->size, DMA_TO_DEVICE); } } @@ -826,7 +828,8 @@ no_csum: desc->l4i_chk = l4i_chk; desc->byte_cnt = length; - desc->buf_ptr = dma_map_single(NULL, skb->data, length, DMA_TO_DEVICE); + desc->buf_ptr = dma_map_single(mp->dev->dev.parent, skb->data, + length, DMA_TO_DEVICE); __skb_queue_tail(&txq->tx_skb, skb); @@ -956,10 +959,10 @@ static int txq_reclaim(struct tx_queue *txq, int budget, int force) } if (cmd_sts & TX_FIRST_DESC) { - dma_unmap_single(NULL, desc->buf_ptr, + dma_unmap_single(mp->dev->dev.parent, desc->buf_ptr, desc->byte_cnt, DMA_TO_DEVICE); } else { - dma_unmap_page(NULL, desc->buf_ptr, + dma_unmap_page(mp->dev->dev.parent, desc->buf_ptr, desc->byte_cnt, DMA_TO_DEVICE); } @@ -1894,9 +1897,9 @@ static int rxq_init(struct mv643xx_eth_private *mp, int index) mp->rx_desc_sram_size); rxq->rx_desc_dma = mp->rx_desc_sram_addr; } else { - rxq->rx_desc_area = dma_alloc_coherent(NULL, size, - &rxq->rx_desc_dma, - GFP_KERNEL); + rxq->rx_desc_area = dma_alloc_coherent(mp->dev->dev.parent, + size, &rxq->rx_desc_dma, + GFP_KERNEL); } if (rxq->rx_desc_area == NULL) { @@ -1947,7 +1950,7 @@ out_free: if (index == 0 && size <= mp->rx_desc_sram_size) iounmap(rxq->rx_desc_area); else - dma_free_coherent(NULL, size, + dma_free_coherent(mp->dev->dev.parent, size, rxq->rx_desc_area, rxq->rx_desc_dma); @@ -1979,7 +1982,7 @@ static void rxq_deinit(struct rx_queue *rxq) rxq->rx_desc_area_size <= mp->rx_desc_sram_size) iounmap(rxq->rx_desc_area); else - dma_free_coherent(NULL, rxq->rx_desc_area_size, + dma_free_coherent(mp->dev->dev.parent, rxq->rx_desc_area_size, rxq->rx_desc_area, rxq->rx_desc_dma); kfree(rxq->rx_skb); @@ -2007,9 +2010,9 @@ static int txq_init(struct mv643xx_eth_private *mp, int index) mp->tx_desc_sram_size); txq->tx_desc_dma = mp->tx_desc_sram_addr; } else { - txq->tx_desc_area = dma_alloc_coherent(NULL, size, - &txq->tx_desc_dma, - GFP_KERNEL); + txq->tx_desc_area = dma_alloc_coherent(mp->dev->dev.parent, + size, &txq->tx_desc_dma, + GFP_KERNEL); } if (txq->tx_desc_area == NULL) { @@ -2053,7 +2056,7 @@ static void txq_deinit(struct tx_queue *txq) txq->tx_desc_area_size <= mp->tx_desc_sram_size) iounmap(txq->tx_desc_area); else - dma_free_coherent(NULL, txq->tx_desc_area_size, + dma_free_coherent(mp->dev->dev.parent, txq->tx_desc_area_size, txq->tx_desc_area, txq->tx_desc_dma); } diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 0b6e8c896835..3b19e0ce290f 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -66,7 +66,6 @@ static const int multicast_filter_limit = 32; #define RX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */ #define TX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */ #define EarlyTxThld 0x3F /* 0x3F means NO early transmit */ -#define RxPacketMaxSize 0x3FE8 /* 16K - 1 - ETH_HLEN - VLAN - CRC... */ #define SafeMtu 0x1c20 /* ... actually life sucks beyond ~7k */ #define InterFrameGap 0x03 /* 3 means InterFrameGap = the shortest one */ @@ -2357,10 +2356,10 @@ static u16 rtl_rw_cpluscmd(void __iomem *ioaddr) return cmd; } -static void rtl_set_rx_max_size(void __iomem *ioaddr) +static void rtl_set_rx_max_size(void __iomem *ioaddr, unsigned int rx_buf_sz) { /* Low hurts. Let's disable the filtering. */ - RTL_W16(RxMaxSize, 16383); + RTL_W16(RxMaxSize, rx_buf_sz); } static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version) @@ -2407,7 +2406,7 @@ static void rtl_hw_start_8169(struct net_device *dev) RTL_W8(EarlyTxThres, EarlyTxThld); - rtl_set_rx_max_size(ioaddr); + rtl_set_rx_max_size(ioaddr, tp->rx_buf_sz); if ((tp->mac_version == RTL_GIGA_MAC_VER_01) || (tp->mac_version == RTL_GIGA_MAC_VER_02) || @@ -2668,7 +2667,7 @@ static void rtl_hw_start_8168(struct net_device *dev) RTL_W8(EarlyTxThres, EarlyTxThld); - rtl_set_rx_max_size(ioaddr); + rtl_set_rx_max_size(ioaddr, tp->rx_buf_sz); tp->cp_cmd |= RTL_R16(CPlusCmd) | PktCntrDisable | INTT_1; @@ -2846,7 +2845,7 @@ static void rtl_hw_start_8101(struct net_device *dev) RTL_W8(EarlyTxThres, EarlyTxThld); - rtl_set_rx_max_size(ioaddr); + rtl_set_rx_max_size(ioaddr, tp->rx_buf_sz); tp->cp_cmd |= rtl_rw_cpluscmd(ioaddr) | PCIMulRW; @@ -3554,54 +3553,64 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) int handled = 0; int status; + /* loop handling interrupts until we have no new ones or + * we hit a invalid/hotplug case. + */ status = RTL_R16(IntrStatus); + while (status && status != 0xffff) { + handled = 1; - /* hotplug/major error/no more work/shared irq */ - if ((status == 0xffff) || !status) - goto out; - - handled = 1; + /* Handle all of the error cases first. These will reset + * the chip, so just exit the loop. + */ + if (unlikely(!netif_running(dev))) { + rtl8169_asic_down(ioaddr); + break; + } - if (unlikely(!netif_running(dev))) { - rtl8169_asic_down(ioaddr); - goto out; - } + /* Work around for rx fifo overflow */ + if (unlikely(status & RxFIFOOver) && + (tp->mac_version == RTL_GIGA_MAC_VER_11)) { + netif_stop_queue(dev); + rtl8169_tx_timeout(dev); + break; + } - status &= tp->intr_mask; - RTL_W16(IntrStatus, - (status & RxFIFOOver) ? (status | RxOverflow) : status); + if (unlikely(status & SYSErr)) { + rtl8169_pcierr_interrupt(dev); + break; + } - if (!(status & tp->intr_event)) - goto out; + if (status & LinkChg) + rtl8169_check_link_status(dev, tp, ioaddr); - /* Work around for rx fifo overflow */ - if (unlikely(status & RxFIFOOver) && - (tp->mac_version == RTL_GIGA_MAC_VER_11)) { - netif_stop_queue(dev); - rtl8169_tx_timeout(dev); - goto out; - } + /* We need to see the lastest version of tp->intr_mask to + * avoid ignoring an MSI interrupt and having to wait for + * another event which may never come. + */ + smp_rmb(); + if (status & tp->intr_mask & tp->napi_event) { + RTL_W16(IntrMask, tp->intr_event & ~tp->napi_event); + tp->intr_mask = ~tp->napi_event; + + if (likely(napi_schedule_prep(&tp->napi))) + __napi_schedule(&tp->napi); + else if (netif_msg_intr(tp)) { + printk(KERN_INFO "%s: interrupt %04x in poll\n", + dev->name, status); + } + } - if (unlikely(status & SYSErr)) { - rtl8169_pcierr_interrupt(dev); - goto out; + /* We only get a new MSI interrupt when all active irq + * sources on the chip have been acknowledged. So, ack + * everything we've seen and check if new sources have become + * active to avoid blocking all interrupts from the chip. + */ + RTL_W16(IntrStatus, + (status & RxFIFOOver) ? (status | RxOverflow) : status); + status = RTL_R16(IntrStatus); } - if (status & LinkChg) - rtl8169_check_link_status(dev, tp, ioaddr); - - if (status & tp->napi_event) { - RTL_W16(IntrMask, tp->intr_event & ~tp->napi_event); - tp->intr_mask = ~tp->napi_event; - - if (likely(napi_schedule_prep(&tp->napi))) - __napi_schedule(&tp->napi); - else if (netif_msg_intr(tp)) { - printk(KERN_INFO "%s: interrupt %04x in poll\n", - dev->name, status); - } - } -out: return IRQ_RETVAL(handled); } @@ -3617,13 +3626,15 @@ static int rtl8169_poll(struct napi_struct *napi, int budget) if (work_done < budget) { napi_complete(napi); - tp->intr_mask = 0xffff; - /* - * 20040426: the barrier is not strictly required but the - * behavior of the irq handler could be less predictable - * without it. Btw, the lack of flush for the posted pci - * write is safe - FR + + /* We need for force the visibility of tp->intr_mask + * for other CPUs, as we can loose an MSI interrupt + * and potentially wait for a retransmit timeout if we don't. + * The posted write to IntrMask is safe, as it will + * eventually make it to the chip and we won't loose anything + * until it does. */ + tp->intr_mask = 0xffff; smp_wmb(); RTL_W16(IntrMask, tp->intr_event); } diff --git a/drivers/net/vxge/vxge-traffic.c b/drivers/net/vxge/vxge-traffic.c index 7be0ae10d69b..c2eeac4125f3 100644 --- a/drivers/net/vxge/vxge-traffic.c +++ b/drivers/net/vxge/vxge-traffic.c @@ -115,7 +115,7 @@ enum vxge_hw_status vxge_hw_vpath_intr_enable(struct __vxge_hw_vpath_handle *vp) VXGE_HW_KDFCCTL_ERRORS_REG_KDFCCTL_FIFO1_POISON| VXGE_HW_KDFCCTL_ERRORS_REG_KDFCCTL_FIFO2_POISON| VXGE_HW_KDFCCTL_ERRORS_REG_KDFCCTL_FIFO1_DMA_ERR| - VXGE_HW_KDFCCTL_ERRORS_REG_KDFCCTL_FIFO1_DMA_ERR), 0, 32), + VXGE_HW_KDFCCTL_ERRORS_REG_KDFCCTL_FIFO2_DMA_ERR), 0, 32), &vp_reg->kdfcctl_errors_mask); __vxge_hw_pio_mem_write32_upper(0, &vp_reg->vpath_ppif_int_mask); diff --git a/drivers/net/wimax/i2400m/rx.c b/drivers/net/wimax/i2400m/rx.c index 02419bfd64b5..f9fc38902322 100644 --- a/drivers/net/wimax/i2400m/rx.c +++ b/drivers/net/wimax/i2400m/rx.c @@ -819,10 +819,9 @@ void i2400m_roq_queue_update_ws(struct i2400m *i2400m, struct i2400m_roq *roq, roq_data = (struct i2400m_roq_data *) &skb->cb; i2400m_net_erx(i2400m, skb, roq_data->cs); } - else { + else __i2400m_roq_queue(i2400m, roq, skb, sn, nsn); - __i2400m_roq_update_ws(i2400m, roq, sn + 1); - } + __i2400m_roq_update_ws(i2400m, roq, sn + 1); i2400m_roq_log_add(i2400m, roq, I2400M_RO_TYPE_PACKET_WS, old_ws, len, sn, nsn, roq->ws); } diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c index ca4151a9e222..17851321b7fd 100644 --- a/drivers/net/wimax/i2400m/usb.c +++ b/drivers/net/wimax/i2400m/usb.c @@ -505,27 +505,52 @@ int i2400mu_suspend(struct usb_interface *iface, pm_message_t pm_msg) #ifdef CONFIG_PM struct usb_device *usb_dev = i2400mu->usb_dev; #endif + unsigned is_autosuspend = 0; struct i2400m *i2400m = &i2400mu->i2400m; +#ifdef CONFIG_PM + if (usb_dev->auto_pm > 0) + is_autosuspend = 1; +#endif + d_fnstart(3, dev, "(iface %p pm_msg %u)\n", iface, pm_msg.event); if (i2400m->updown == 0) goto no_firmware; - d_printf(1, dev, "fw up, requesting standby\n"); + if (i2400m->state == I2400M_SS_DATA_PATH_CONNECTED && is_autosuspend) { + /* ugh -- the device is connected and this suspend + * request is an autosuspend one (not a system standby + * / hibernate). + * + * The only way the device can go to standby is if the + * link with the base station is in IDLE mode; that + * were the case, we'd be in status + * I2400M_SS_CONNECTED_IDLE. But we are not. + * + * If we *tell* him to go power save now, it'll reset + * as a precautionary measure, so if this is an + * autosuspend thing, say no and it'll come back + * later, when the link is IDLE + */ + result = -EBADF; + d_printf(1, dev, "fw up, link up, not-idle, autosuspend: " + "not entering powersave\n"); + goto error_not_now; + } + d_printf(1, dev, "fw up: entering powersave\n"); atomic_dec(&i2400mu->do_autopm); result = i2400m_cmd_enter_powersave(i2400m); atomic_inc(&i2400mu->do_autopm); -#ifdef CONFIG_PM - if (result < 0 && usb_dev->auto_pm == 0) { + if (result < 0 && !is_autosuspend) { /* System suspend, can't fail */ dev_err(dev, "failed to suspend, will reset on resume\n"); result = 0; } -#endif if (result < 0) goto error_enter_powersave; i2400mu_notification_release(i2400mu); - d_printf(1, dev, "fw up, got standby\n"); + d_printf(1, dev, "powersave requested\n"); error_enter_powersave: +error_not_now: no_firmware: d_fnend(3, dev, "(iface %p pm_msg %u) = %d\n", iface, pm_msg.event, result); diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig index 8a0823588c51..3d94e7dfea69 100644 --- a/drivers/net/wireless/Kconfig +++ b/drivers/net/wireless/Kconfig @@ -430,6 +430,7 @@ config RTL8187 ASUS P5B Deluxe Toshiba Satellite Pro series of laptops Asus Wireless Link + Linksys WUSB54GC-EU Thanks to Realtek for their support! diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index d73475739127..9eabf4d1f2e7 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -6467,6 +6467,7 @@ static int airo_get_encode(struct net_device *dev, { struct airo_info *local = dev->ml_priv; int index = (dwrq->flags & IW_ENCODE_INDEX) - 1; + int wep_key_len; u8 buf[16]; if (!local->wep_capable) @@ -6500,11 +6501,13 @@ static int airo_get_encode(struct net_device *dev, dwrq->flags |= index + 1; /* Copy the key to the user buffer */ - dwrq->length = get_wep_key(local, index, &buf[0], sizeof(buf)); - if (dwrq->length != -1) - memcpy(extra, buf, dwrq->length); - else + wep_key_len = get_wep_key(local, index, &buf[0], sizeof(buf)); + if (wep_key_len < 0) { dwrq->length = 0; + } else { + dwrq->length = wep_key_len; + memcpy(extra, buf, dwrq->length); + } return 0; } @@ -6617,7 +6620,7 @@ static int airo_get_encodeext(struct net_device *dev, struct airo_info *local = dev->ml_priv; struct iw_point *encoding = &wrqu->encoding; struct iw_encode_ext *ext = (struct iw_encode_ext *)extra; - int idx, max_key_len; + int idx, max_key_len, wep_key_len; u8 buf[16]; if (!local->wep_capable) @@ -6661,11 +6664,13 @@ static int airo_get_encodeext(struct net_device *dev, memset(extra, 0, 16); /* Copy the key to the user buffer */ - ext->key_len = get_wep_key(local, idx, &buf[0], sizeof(buf)); - if (ext->key_len != -1) - memcpy(extra, buf, ext->key_len); - else + wep_key_len = get_wep_key(local, idx, &buf[0], sizeof(buf)); + if (wep_key_len < 0) { ext->key_len = 0; + } else { + ext->key_len = wep_key_len; + memcpy(extra, buf, ext->key_len); + } return 0; } diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c index 744f4f4dd3d1..8d93ca4651b9 100644 --- a/drivers/net/wireless/at76c50x-usb.c +++ b/drivers/net/wireless/at76c50x-usb.c @@ -1873,18 +1873,18 @@ static void at76_dwork_hw_scan(struct work_struct *work) if (ret != CMD_STATUS_COMPLETE) { queue_delayed_work(priv->hw->workqueue, &priv->dwork_hw_scan, SCAN_POLL_INTERVAL); - goto exit; + mutex_unlock(&priv->mtx); + return; } - ieee80211_scan_completed(priv->hw, false); - if (is_valid_ether_addr(priv->bssid)) at76_join(priv); - ieee80211_wake_queues(priv->hw); - -exit: mutex_unlock(&priv->mtx); + + ieee80211_scan_completed(priv->hw, false); + + ieee80211_wake_queues(priv->hw); } static int at76_hw_scan(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/ath5k/phy.c b/drivers/net/wireless/ath5k/phy.c index 9e2faae5ae94..b48b29dca3d2 100644 --- a/drivers/net/wireless/ath5k/phy.c +++ b/drivers/net/wireless/ath5k/phy.c @@ -1487,28 +1487,35 @@ ath5k_get_linear_pcdac_min(const u8 *stepL, const u8 *stepR, { s8 tmp; s16 min_pwrL, min_pwrR; - s16 pwr_i = pwrL[0]; - - do { - pwr_i--; - tmp = (s8) ath5k_get_interpolated_value(pwr_i, - pwrL[0], pwrL[1], - stepL[0], stepL[1]); - - } while (tmp > 1); - - min_pwrL = pwr_i; - - pwr_i = pwrR[0]; - do { - pwr_i--; - tmp = (s8) ath5k_get_interpolated_value(pwr_i, - pwrR[0], pwrR[1], - stepR[0], stepR[1]); - - } while (tmp > 1); + s16 pwr_i; + + if (pwrL[0] == pwrL[1]) + min_pwrL = pwrL[0]; + else { + pwr_i = pwrL[0]; + do { + pwr_i--; + tmp = (s8) ath5k_get_interpolated_value(pwr_i, + pwrL[0], pwrL[1], + stepL[0], stepL[1]); + } while (tmp > 1); + + min_pwrL = pwr_i; + } - min_pwrR = pwr_i; + if (pwrR[0] == pwrR[1]) + min_pwrR = pwrR[0]; + else { + pwr_i = pwrR[0]; + do { + pwr_i--; + tmp = (s8) ath5k_get_interpolated_value(pwr_i, + pwrR[0], pwrR[1], + stepR[0], stepR[1]); + } while (tmp > 1); + + min_pwrR = pwr_i; + } /* Keep the right boundary so that it works for both curves */ return max(min_pwrL, min_pwrR); diff --git a/drivers/net/wireless/ath5k/reset.c b/drivers/net/wireless/ath5k/reset.c index 7a17d31b2fd9..5f72c111c2e8 100644 --- a/drivers/net/wireless/ath5k/reset.c +++ b/drivers/net/wireless/ath5k/reset.c @@ -26,7 +26,7 @@ \*****************************/ #include <linux/pci.h> /* To determine if a card is pci-e */ -#include <linux/bitops.h> /* For get_bitmask_order */ +#include <linux/log2.h> #include "ath5k.h" #include "reg.h" #include "base.h" @@ -69,10 +69,10 @@ static inline int ath5k_hw_write_ofdm_timings(struct ath5k_hw *ah, /* Get exponent * ALGO: coef_exp = 14 - highest set bit position */ - coef_exp = get_bitmask_order(coef_scaled); + coef_exp = ilog2(coef_scaled); /* Doesn't make sense if it's zero*/ - if (!coef_exp) + if (!coef_scaled || !coef_exp) return -EINVAL; /* Note: we've shifted coef_scaled by 24 */ @@ -359,7 +359,7 @@ int ath5k_hw_nic_wakeup(struct ath5k_hw *ah, int flags, bool initial) mode |= AR5K_PHY_MODE_FREQ_5GHZ; if (ah->ah_radio == AR5K_RF5413) - clock |= AR5K_PHY_PLL_40MHZ_5413; + clock = AR5K_PHY_PLL_40MHZ_5413; else clock |= AR5K_PHY_PLL_40MHZ; diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c index e5ca2511a81a..9452461ce864 100644 --- a/drivers/net/wireless/iwlwifi/iwl-5000.c +++ b/drivers/net/wireless/iwlwifi/iwl-5000.c @@ -46,7 +46,7 @@ #include "iwl-6000-hw.h" /* Highest firmware API version supported */ -#define IWL5000_UCODE_API_MAX 1 +#define IWL5000_UCODE_API_MAX 2 #define IWL5150_UCODE_API_MAX 2 /* Lowest firmware API version supported */ diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 3bb28db4a40f..f46ba2475776 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -669,13 +669,6 @@ static int iwl_set_mode(struct iwl_priv *priv, int mode) if (!iwl_is_ready_rf(priv)) return -EAGAIN; - cancel_delayed_work(&priv->scan_check); - if (iwl_scan_cancel_timeout(priv, 100)) { - IWL_WARN(priv, "Aborted scan still in progress after 100ms\n"); - IWL_DEBUG_MAC80211(priv, "leaving - scan abort failed.\n"); - return -EAGAIN; - } - iwl_commit_rxon(priv); return 0; diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c index e7c65c4f741b..6330b91e37ce 100644 --- a/drivers/net/wireless/iwlwifi/iwl-scan.c +++ b/drivers/net/wireless/iwlwifi/iwl-scan.c @@ -227,9 +227,6 @@ static void iwl_rx_scan_complete_notif(struct iwl_priv *priv, /* The HW is no longer scanning */ clear_bit(STATUS_SCAN_HW, &priv->status); - /* The scan completion notification came in, so kill that timer... */ - cancel_delayed_work(&priv->scan_check); - IWL_DEBUG_INFO(priv, "Scan pass on %sGHz took %dms\n", (priv->scan_bands & BIT(IEEE80211_BAND_2GHZ)) ? "2.4" : "5.2", @@ -712,6 +709,8 @@ static void iwl_bg_request_scan(struct work_struct *data) mutex_lock(&priv->mutex); + cancel_delayed_work(&priv->scan_check); + if (!iwl_is_ready(priv)) { IWL_WARN(priv, "request scan called when driver not ready.\n"); goto done; @@ -925,6 +924,8 @@ void iwl_bg_scan_completed(struct work_struct *work) IWL_DEBUG_SCAN(priv, "SCAN complete scan\n"); + cancel_delayed_work(&priv->scan_check); + ieee80211_scan_completed(priv->hw, false); if (test_bit(STATUS_EXIT_PENDING, &priv->status)) diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 4cce66133500..ff4d0e41d7c4 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -782,13 +782,6 @@ static int iwl3945_set_mode(struct iwl_priv *priv, int mode) if (!iwl_is_ready_rf(priv)) return -EAGAIN; - cancel_delayed_work(&priv->scan_check); - if (iwl_scan_cancel_timeout(priv, 100)) { - IWL_WARN(priv, "Aborted scan still in progress after 100ms\n"); - IWL_DEBUG_MAC80211(priv, "leaving - scan abort failed.\n"); - return -EAGAIN; - } - iwl3945_commit_rxon(priv); return 0; @@ -3298,6 +3291,8 @@ static void iwl3945_bg_request_scan(struct work_struct *data) mutex_lock(&priv->mutex); + cancel_delayed_work(&priv->scan_check); + if (!iwl_is_ready(priv)) { IWL_WARN(priv, "request scan called when driver not ready.\n"); goto done; diff --git a/drivers/net/wireless/rt2x00/rt2x00debug.c b/drivers/net/wireless/rt2x00/rt2x00debug.c index 07d378ef0b46..7b3ee8c2eaef 100644 --- a/drivers/net/wireless/rt2x00/rt2x00debug.c +++ b/drivers/net/wireless/rt2x00/rt2x00debug.c @@ -138,7 +138,7 @@ void rt2x00debug_update_crypto(struct rt2x00_dev *rt2x00dev, if (cipher == CIPHER_TKIP_NO_MIC) cipher = CIPHER_TKIP; - if (cipher == CIPHER_NONE || cipher > CIPHER_MAX) + if (cipher == CIPHER_NONE || cipher >= CIPHER_MAX) return; /* Remove CIPHER_NONE index */ diff --git a/drivers/net/wireless/rtl818x/rtl8187_dev.c b/drivers/net/wireless/rtl818x/rtl8187_dev.c index bac6cfba6abd..d51ba0a88c23 100644 --- a/drivers/net/wireless/rtl818x/rtl8187_dev.c +++ b/drivers/net/wireless/rtl818x/rtl8187_dev.c @@ -71,6 +71,8 @@ static struct usb_device_id rtl8187_table[] __devinitdata = { {USB_DEVICE(0x18E8, 0x6232), .driver_info = DEVICE_RTL8187}, /* AirLive */ {USB_DEVICE(0x1b75, 0x8187), .driver_info = DEVICE_RTL8187}, + /* Linksys */ + {USB_DEVICE(0x1737, 0x0073), .driver_info = DEVICE_RTL8187B}, {} }; diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index f0e99d4c066b..242257b19441 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -78,16 +78,20 @@ void free_cpu_buffers(void) op_ring_buffer_write = NULL; } +#define RB_EVENT_HDR_SIZE 4 + int alloc_cpu_buffers(void) { int i; unsigned long buffer_size = oprofile_cpu_buffer_size; + unsigned long byte_size = buffer_size * (sizeof(struct op_sample) + + RB_EVENT_HDR_SIZE); - op_ring_buffer_read = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS); + op_ring_buffer_read = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS); if (!op_ring_buffer_read) goto fail; - op_ring_buffer_write = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS); + op_ring_buffer_write = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS); if (!op_ring_buffer_write) goto fail; diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c index 73348c4047e9..4a9cc92d4d18 100644 --- a/drivers/parisc/iosapic.c +++ b/drivers/parisc/iosapic.c @@ -702,7 +702,7 @@ static unsigned int iosapic_startup_irq(unsigned int irq) } #ifdef CONFIG_SMP -static void iosapic_set_affinity_irq(unsigned int irq, +static int iosapic_set_affinity_irq(unsigned int irq, const struct cpumask *dest) { struct vector_info *vi = iosapic_get_vector(irq); @@ -712,7 +712,7 @@ static void iosapic_set_affinity_irq(unsigned int irq, dest_cpu = cpu_check_affinity(irq, dest); if (dest_cpu < 0) - return; + return -1; cpumask_copy(irq_desc[irq].affinity, cpumask_of(dest_cpu)); vi->txn_addr = txn_affinity_addr(irq, dest_cpu); @@ -724,6 +724,8 @@ static void iosapic_set_affinity_irq(unsigned int irq, iosapic_set_irt_data(vi, &dummy_d0, &d1); iosapic_wr_irt_entry(vi, d0, d1); spin_unlock_irqrestore(&iosapic_lock, flags); + + return 0; } #endif diff --git a/drivers/parport/parport_gsc.c b/drivers/parport/parport_gsc.c index e6a7e847ee80..ea31a452b153 100644 --- a/drivers/parport/parport_gsc.c +++ b/drivers/parport/parport_gsc.c @@ -352,8 +352,8 @@ static int __devinit parport_init_chip(struct parisc_device *dev) unsigned long port; if (!dev->irq) { - printk(KERN_WARNING "IRQ not found for parallel device at 0x%lx\n", - dev->hpa.start); + printk(KERN_WARNING "IRQ not found for parallel device at 0x%llx\n", + (unsigned long long)dev->hpa.start); return -ENODEV; } diff --git a/drivers/parport/share.c b/drivers/parport/share.c index 0ebca450ed29..dffa5d4fb298 100644 --- a/drivers/parport/share.c +++ b/drivers/parport/share.c @@ -614,7 +614,10 @@ parport_register_device(struct parport *port, const char *name, * pardevice fields. -arca */ port->ops->init_state(tmp, tmp->state); - parport_device_proc_register(tmp); + if (!test_and_set_bit(PARPORT_DEVPROC_REGISTERED, &port->devflags)) { + port->proc_device = tmp; + parport_device_proc_register(tmp); + } return tmp; out_free_all: @@ -646,10 +649,14 @@ void parport_unregister_device(struct pardevice *dev) } #endif - parport_device_proc_unregister(dev); - port = dev->port->physport; + if (port->proc_device == dev) { + port->proc_device = NULL; + clear_bit(PARPORT_DEVPROC_REGISTERED, &port->devflags); + parport_device_proc_unregister(dev); + } + if (port->cad == dev) { printk(KERN_DEBUG "%s: %s forgot to release port\n", port->name, dev->name); diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h index 4fc168b70095..e68d5f20ffb3 100644 --- a/drivers/pci/hotplug/acpiphp.h +++ b/drivers/pci/hotplug/acpiphp.h @@ -129,7 +129,6 @@ struct acpiphp_func { struct acpiphp_bridge *bridge; /* Ejectable PCI-to-PCI bridge */ struct list_head sibling; - struct pci_dev *pci_dev; struct notifier_block nb; acpi_handle handle; diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index a33794d9e0dc..3a6064bce561 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -32,9 +32,6 @@ /* * Lifetime rules for pci_dev: - * - The one in acpiphp_func has its refcount elevated by pci_get_slot() - * when the driver is loaded or when an insertion event occurs. It loses - * a refcount when its ejected or the driver unloads. * - The one in acpiphp_bridge has its refcount elevated by pci_get_slot() * when the bridge is scanned and it loses a refcount when the bridge * is removed. @@ -130,6 +127,7 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv) unsigned long long adr, sun; int device, function, retval; struct pci_bus *pbus = bridge->pci_bus; + struct pci_dev *pdev; if (!acpi_pci_check_ejectable(pbus, handle) && !is_dock_device(handle)) return AE_OK; @@ -213,10 +211,10 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv) newfunc->slot = slot; list_add_tail(&newfunc->sibling, &slot->funcs); - /* associate corresponding pci_dev */ - newfunc->pci_dev = pci_get_slot(pbus, PCI_DEVFN(device, function)); - if (newfunc->pci_dev) { + pdev = pci_get_slot(pbus, PCI_DEVFN(device, function)); + if (pdev) { slot->flags |= (SLOT_ENABLED | SLOT_POWEREDON); + pci_dev_put(pdev); } if (is_dock_device(handle)) { @@ -617,7 +615,6 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge) if (ACPI_FAILURE(status)) err("failed to remove notify handler\n"); } - pci_dev_put(func->pci_dev); list_del(list); kfree(func); } @@ -1101,22 +1098,24 @@ static int __ref enable_device(struct acpiphp_slot *slot) pci_enable_bridges(bus); pci_bus_add_devices(bus); - /* associate pci_dev to our representation */ list_for_each (l, &slot->funcs) { func = list_entry(l, struct acpiphp_func, sibling); - func->pci_dev = pci_get_slot(bus, PCI_DEVFN(slot->device, - func->function)); - if (!func->pci_dev) + dev = pci_get_slot(bus, PCI_DEVFN(slot->device, + func->function)); + if (!dev) continue; - if (func->pci_dev->hdr_type != PCI_HEADER_TYPE_BRIDGE && - func->pci_dev->hdr_type != PCI_HEADER_TYPE_CARDBUS) + if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE && + dev->hdr_type != PCI_HEADER_TYPE_CARDBUS) { + pci_dev_put(dev); continue; + } status = find_p2p_bridge(func->handle, (u32)1, bus, NULL); if (ACPI_FAILURE(status)) warn("find_p2p_bridge failed (error code = 0x%x)\n", status); + pci_dev_put(dev); } slot->flags |= SLOT_ENABLED; @@ -1142,17 +1141,14 @@ static void disable_bridges(struct pci_bus *bus) */ static int disable_device(struct acpiphp_slot *slot) { - int retval = 0; struct acpiphp_func *func; - struct list_head *l; + struct pci_dev *pdev; /* is this slot already disabled? */ if (!(slot->flags & SLOT_ENABLED)) goto err_exit; - list_for_each (l, &slot->funcs) { - func = list_entry(l, struct acpiphp_func, sibling); - + list_for_each_entry(func, &slot->funcs, sibling) { if (func->bridge) { /* cleanup p2p bridges under this P2P bridge */ cleanup_p2p_bridge(func->bridge->handle, @@ -1160,35 +1156,28 @@ static int disable_device(struct acpiphp_slot *slot) func->bridge = NULL; } - if (func->pci_dev) { - pci_stop_bus_device(func->pci_dev); - if (func->pci_dev->subordinate) { - disable_bridges(func->pci_dev->subordinate); - pci_disable_device(func->pci_dev); + pdev = pci_get_slot(slot->bridge->pci_bus, + PCI_DEVFN(slot->device, func->function)); + if (pdev) { + pci_stop_bus_device(pdev); + if (pdev->subordinate) { + disable_bridges(pdev->subordinate); + pci_disable_device(pdev); } + pci_remove_bus_device(pdev); + pci_dev_put(pdev); } } - list_for_each (l, &slot->funcs) { - func = list_entry(l, struct acpiphp_func, sibling); - + list_for_each_entry(func, &slot->funcs, sibling) { acpiphp_unconfigure_ioapics(func->handle); acpiphp_bus_trim(func->handle); - /* try to remove anyway. - * acpiphp_bus_add might have been failed */ - - if (!func->pci_dev) - continue; - - pci_remove_bus_device(func->pci_dev); - pci_dev_put(func->pci_dev); - func->pci_dev = NULL; } slot->flags &= (~SLOT_ENABLED); - err_exit: - return retval; +err_exit: + return 0; } diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c index dd18f857dfb0..42e4260c3b12 100644 --- a/drivers/pci/hotplug/ibmphp_core.c +++ b/drivers/pci/hotplug/ibmphp_core.c @@ -153,45 +153,47 @@ int ibmphp_init_devno(struct slot **cur_slot) return -1; } for (loop = 0; loop < len; loop++) { - if ((*cur_slot)->number == rtable->slots[loop].slot) { - if ((*cur_slot)->bus == rtable->slots[loop].bus) { + if ((*cur_slot)->number == rtable->slots[loop].slot && + (*cur_slot)->bus == rtable->slots[loop].bus) { + struct io_apic_irq_attr irq_attr; + (*cur_slot)->device = PCI_SLOT(rtable->slots[loop].devfn); for (i = 0; i < 4; i++) (*cur_slot)->irq[i] = IO_APIC_get_PCI_irq_vector((int) (*cur_slot)->bus, - (int) (*cur_slot)->device, i); - - debug("(*cur_slot)->irq[0] = %x\n", - (*cur_slot)->irq[0]); - debug("(*cur_slot)->irq[1] = %x\n", - (*cur_slot)->irq[1]); - debug("(*cur_slot)->irq[2] = %x\n", - (*cur_slot)->irq[2]); - debug("(*cur_slot)->irq[3] = %x\n", - (*cur_slot)->irq[3]); - - debug("rtable->exlusive_irqs = %x\n", + (int) (*cur_slot)->device, i, + &irq_attr); + + debug("(*cur_slot)->irq[0] = %x\n", + (*cur_slot)->irq[0]); + debug("(*cur_slot)->irq[1] = %x\n", + (*cur_slot)->irq[1]); + debug("(*cur_slot)->irq[2] = %x\n", + (*cur_slot)->irq[2]); + debug("(*cur_slot)->irq[3] = %x\n", + (*cur_slot)->irq[3]); + + debug("rtable->exlusive_irqs = %x\n", rtable->exclusive_irqs); - debug("rtable->slots[loop].irq[0].bitmap = %x\n", + debug("rtable->slots[loop].irq[0].bitmap = %x\n", rtable->slots[loop].irq[0].bitmap); - debug("rtable->slots[loop].irq[1].bitmap = %x\n", + debug("rtable->slots[loop].irq[1].bitmap = %x\n", rtable->slots[loop].irq[1].bitmap); - debug("rtable->slots[loop].irq[2].bitmap = %x\n", + debug("rtable->slots[loop].irq[2].bitmap = %x\n", rtable->slots[loop].irq[2].bitmap); - debug("rtable->slots[loop].irq[3].bitmap = %x\n", + debug("rtable->slots[loop].irq[3].bitmap = %x\n", rtable->slots[loop].irq[3].bitmap); - debug("rtable->slots[loop].irq[0].link = %x\n", + debug("rtable->slots[loop].irq[0].link = %x\n", rtable->slots[loop].irq[0].link); - debug("rtable->slots[loop].irq[1].link = %x\n", + debug("rtable->slots[loop].irq[1].link = %x\n", rtable->slots[loop].irq[1].link); - debug("rtable->slots[loop].irq[2].link = %x\n", + debug("rtable->slots[loop].irq[2].link = %x\n", rtable->slots[loop].irq[2].link); - debug("rtable->slots[loop].irq[3].link = %x\n", + debug("rtable->slots[loop].irq[3].link = %x\n", rtable->slots[loop].irq[3].link); - debug("end of init_devno\n"); - kfree(rtable); - return 0; - } + debug("end of init_devno\n"); + kfree(rtable); + return 0; } } diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c index 6808d8333ecc..737a1c44b07a 100644 --- a/drivers/pci/htirq.c +++ b/drivers/pci/htirq.c @@ -98,6 +98,7 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update) int max_irq; int pos; int irq; + int node; pos = pci_find_ht_capability(dev, HT_CAPTYPE_IRQ); if (!pos) @@ -125,7 +126,8 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update) cfg->msg.address_lo = 0xffffffff; cfg->msg.address_hi = 0xffffffff; - irq = create_irq(); + node = dev_to_node(&dev->dev); + irq = create_irq_nr(0, node); if (irq <= 0) { kfree(cfg); diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index a563fbe559d0..cd389162735f 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1972,15 +1972,6 @@ static int __init init_dmars(void) } } -#ifdef CONFIG_INTR_REMAP - if (!intr_remapping_enabled) { - ret = enable_intr_remapping(0); - if (ret) - printk(KERN_ERR - "IOMMU: enable interrupt remapping failed\n"); - } -#endif - /* * For each rmrr * for each dev attached to rmrr diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index f5e0ea724a6f..3a0cb0bb0593 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -15,6 +15,14 @@ static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; static int ir_ioapic_num; int intr_remapping_enabled; +static int disable_intremap; +static __init int setup_nointremap(char *str) +{ + disable_intremap = 1; + return 0; +} +early_param("nointremap", setup_nointremap); + struct irq_2_iommu { struct intel_iommu *iommu; u16 irte_index; @@ -23,15 +31,12 @@ struct irq_2_iommu { }; #ifdef CONFIG_GENERIC_HARDIRQS -static struct irq_2_iommu *get_one_free_irq_2_iommu(int cpu) +static struct irq_2_iommu *get_one_free_irq_2_iommu(int node) { struct irq_2_iommu *iommu; - int node; - - node = cpu_to_node(cpu); iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node); - printk(KERN_DEBUG "alloc irq_2_iommu on cpu %d node %d\n", cpu, node); + printk(KERN_DEBUG "alloc irq_2_iommu on node %d\n", node); return iommu; } @@ -48,7 +53,7 @@ static struct irq_2_iommu *irq_2_iommu(unsigned int irq) return desc->irq_2_iommu; } -static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu) +static struct irq_2_iommu *irq_2_iommu_alloc_node(unsigned int irq, int node) { struct irq_desc *desc; struct irq_2_iommu *irq_iommu; @@ -56,7 +61,7 @@ static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu) /* * alloc irq desc if not allocated already. */ - desc = irq_to_desc_alloc_cpu(irq, cpu); + desc = irq_to_desc_alloc_node(irq, node); if (!desc) { printk(KERN_INFO "can not get irq_desc for %d\n", irq); return NULL; @@ -65,14 +70,14 @@ static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu) irq_iommu = desc->irq_2_iommu; if (!irq_iommu) - desc->irq_2_iommu = get_one_free_irq_2_iommu(cpu); + desc->irq_2_iommu = get_one_free_irq_2_iommu(node); return desc->irq_2_iommu; } static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq) { - return irq_2_iommu_alloc_cpu(irq, boot_cpu_id); + return irq_2_iommu_alloc_node(irq, cpu_to_node(boot_cpu_id)); } #else /* !CONFIG_SPARSE_IRQ */ @@ -423,20 +428,6 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) readl, (sts & DMA_GSTS_IRTPS), sts); spin_unlock_irqrestore(&iommu->register_lock, flags); - if (mode == 0) { - spin_lock_irqsave(&iommu->register_lock, flags); - - /* enable comaptiblity format interrupt pass through */ - cmd = iommu->gcmd | DMA_GCMD_CFI; - iommu->gcmd |= DMA_GCMD_CFI; - writel(cmd, iommu->reg + DMAR_GCMD_REG); - - IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, - readl, (sts & DMA_GSTS_CFIS), sts); - - spin_unlock_irqrestore(&iommu->register_lock, flags); - } - /* * global invalidation of interrupt entry cache before enabling * interrupt-remapping. @@ -516,6 +507,23 @@ end: spin_unlock_irqrestore(&iommu->register_lock, flags); } +int __init intr_remapping_supported(void) +{ + struct dmar_drhd_unit *drhd; + + if (disable_intremap) + return 0; + + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu = drhd->iommu; + + if (!ecap_ir_support(iommu->ecap)) + return 0; + } + + return 1; +} + int __init enable_intr_remapping(int eim) { struct dmar_drhd_unit *drhd; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 34bf0fdf5047..1a91bf9687af 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -557,7 +557,8 @@ static int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state) } else { error = -ENODEV; /* Fall back to PCI_D0 if native PM is not supported */ - pci_update_current_state(dev, PCI_D0); + if (!dev->pm_cap) + dev->current_state = PCI_D0; } return error; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index e3c3e081b834..f1ae2475ffff 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -745,6 +745,8 @@ int pci_setup_device(struct pci_dev *dev) /* Early fixups, before probing the BARs */ pci_fixup_device(pci_fixup_early, dev); + /* device class may be changed after fixup */ + class = dev->class >> 8; switch (dev->hdr_type) { /* header type */ case PCI_HEADER_TYPE_NORMAL: /* standard header */ diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c index eeafc6c0160d..bfc1a8892a32 100644 --- a/drivers/platform/x86/asus-laptop.c +++ b/drivers/platform/x86/asus-laptop.c @@ -269,16 +269,16 @@ static struct key_entry asus_keymap[] = { {KE_KEY, 0x34, KEY_SWITCHVIDEOMODE}, {KE_KEY, 0x40, KEY_PREVIOUSSONG}, {KE_KEY, 0x41, KEY_NEXTSONG}, - {KE_KEY, 0x43, KEY_STOP}, + {KE_KEY, 0x43, KEY_STOPCD}, {KE_KEY, 0x45, KEY_PLAYPAUSE}, {KE_KEY, 0x50, KEY_EMAIL}, {KE_KEY, 0x51, KEY_WWW}, - {KE_KEY, 0x5C, BTN_EXTRA}, /* Performance */ + {KE_KEY, 0x5C, KEY_SCREENLOCK}, /* Screenlock */ {KE_KEY, 0x5D, KEY_WLAN}, {KE_KEY, 0x61, KEY_SWITCHVIDEOMODE}, {KE_KEY, 0x6B, BTN_TOUCH}, /* Lock Mouse */ {KE_KEY, 0x82, KEY_CAMERA}, - {KE_KEY, 0x8A, KEY_TV}, + {KE_KEY, 0x8A, KEY_PROG1}, {KE_KEY, 0x95, KEY_MEDIA}, {KE_KEY, 0x99, KEY_PHONE}, {KE_END, 0}, diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index 6f54fd1757cd..353a898c3693 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c @@ -158,6 +158,7 @@ enum { KE_KEY, KE_END }; static struct key_entry eeepc_keymap[] = { /* Sleep already handled via generic ACPI code */ {KE_KEY, 0x10, KEY_WLAN }, + {KE_KEY, 0x11, KEY_WLAN }, {KE_KEY, 0x12, KEY_PROG1 }, {KE_KEY, 0x13, KEY_MUTE }, {KE_KEY, 0x14, KEY_VOLUMEDOWN }, @@ -166,6 +167,8 @@ static struct key_entry eeepc_keymap[] = { {KE_KEY, 0x1b, KEY_ZOOM }, {KE_KEY, 0x1c, KEY_PROG2 }, {KE_KEY, 0x1d, KEY_PROG3 }, + {KE_KEY, NOTIFY_BRN_MIN, KEY_BRIGHTNESSDOWN }, + {KE_KEY, NOTIFY_BRN_MIN + 2, KEY_BRIGHTNESSUP }, {KE_KEY, 0x30, KEY_SWITCHVIDEOMODE }, {KE_KEY, 0x31, KEY_SWITCHVIDEOMODE }, {KE_KEY, 0x32, KEY_SWITCHVIDEOMODE }, @@ -381,11 +384,13 @@ static ssize_t show_sys_acpi(int cm, char *buf) EEEPC_CREATE_DEVICE_ATTR(camera, CM_ASL_CAMERA); EEEPC_CREATE_DEVICE_ATTR(cardr, CM_ASL_CARDREADER); EEEPC_CREATE_DEVICE_ATTR(disp, CM_ASL_DISPLAYSWITCH); +EEEPC_CREATE_DEVICE_ATTR(cpufv, CM_ASL_CPUFV); static struct attribute *platform_attributes[] = { &dev_attr_camera.attr, &dev_attr_cardr.attr, &dev_attr_disp.attr, + &dev_attr_cpufv.attr, NULL }; @@ -512,15 +517,21 @@ static int eeepc_hotk_check(void) return 0; } -static void notify_brn(void) +static int notify_brn(void) { + /* returns the *previous* brightness, or -1 */ struct backlight_device *bd = eeepc_backlight_device; - if (bd) + if (bd) { + int old = bd->props.brightness; bd->props.brightness = read_brightness(bd); + return old; + } + return -1; } static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data) { + enum rfkill_state state; struct pci_dev *dev; struct pci_bus *bus = pci_find_bus(0, 1); @@ -532,7 +543,9 @@ static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data) return; } - if (get_acpi(CM_ASL_WLAN) == 1) { + eeepc_wlan_rfkill_state(ehotk->eeepc_wlan_rfkill, &state); + + if (state == RFKILL_STATE_UNBLOCKED) { dev = pci_get_slot(bus, 0); if (dev) { /* Device already present */ @@ -552,23 +565,41 @@ static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data) pci_dev_put(dev); } } + + rfkill_force_state(ehotk->eeepc_wlan_rfkill, state); } static void eeepc_hotk_notify(acpi_handle handle, u32 event, void *data) { static struct key_entry *key; u16 count; + int brn = -ENODEV; if (!ehotk) return; if (event >= NOTIFY_BRN_MIN && event <= NOTIFY_BRN_MAX) - notify_brn(); + brn = notify_brn(); count = ehotk->event_count[event % 128]++; acpi_bus_generate_proc_event(ehotk->device, event, count); acpi_bus_generate_netlink_event(ehotk->device->pnp.device_class, dev_name(&ehotk->device->dev), event, count); if (ehotk->inputdev) { + if (brn != -ENODEV) { + /* brightness-change events need special + * handling for conversion to key events + */ + if (brn < 0) + brn = event; + else + brn += NOTIFY_BRN_MIN; + if (event < brn) + event = NOTIFY_BRN_MIN; /* brightness down */ + else if (event > brn) + event = NOTIFY_BRN_MIN + 2; /* ... up */ + else + event = NOTIFY_BRN_MIN + 1; /* ... unchanged */ + } key = eepc_get_entry_by_scancode(event); if (key) { switch (key->type) { @@ -649,6 +680,9 @@ static int eeepc_hotk_add(struct acpi_device *device) if (ACPI_FAILURE(status)) printk(EEEPC_ERR "Error installing notify handler\n"); + eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P6"); + eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P7"); + if (get_acpi(CM_ASL_WLAN) != -1) { ehotk->eeepc_wlan_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WLAN); @@ -704,9 +738,6 @@ static int eeepc_hotk_add(struct acpi_device *device) goto bluetooth_fail; } - eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P6"); - eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P7"); - return 0; bluetooth_fail: @@ -717,6 +748,8 @@ static int eeepc_hotk_add(struct acpi_device *device) wlan_fail: if (ehotk->eeepc_wlan_rfkill) rfkill_free(ehotk->eeepc_wlan_rfkill); + eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P6"); + eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P7"); ehotk_fail: kfree(ehotk); ehotk = NULL; diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c index 9a3a682c6981..9496494f340e 100644 --- a/drivers/pnp/pnpacpi/core.c +++ b/drivers/pnp/pnpacpi/core.c @@ -110,11 +110,9 @@ static int pnpacpi_disable_resources(struct pnp_dev *dev) /* acpi_unregister_gsi(pnp_irq(dev, 0)); */ ret = 0; - if (acpi_bus_power_manageable(handle)) { - ret = acpi_bus_set_power(handle, ACPI_STATE_D3); - if (ret) - return ret; - } + if (acpi_bus_power_manageable(handle)) + acpi_bus_set_power(handle, ACPI_STATE_D3); + /* continue even if acpi_bus_set_power() fails */ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_DIS", NULL, NULL))) ret = -ENODEV; return ret; diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index adf17856bacc..7f207f335bec 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -123,7 +123,7 @@ static void pnpacpi_parse_allocated_irqresource(struct pnp_dev *dev, } flags = irq_flags(triggering, polarity, shareable); - irq = acpi_register_gsi(gsi, triggering, polarity); + irq = acpi_register_gsi(&dev->dev, gsi, triggering, polarity); if (irq >= 0) pcibios_penalize_isa_irq(irq, 1); else diff --git a/drivers/regulator/da903x.c b/drivers/regulator/da903x.c index 72b15495183c..c6628f5a0af7 100644 --- a/drivers/regulator/da903x.c +++ b/drivers/regulator/da903x.c @@ -497,7 +497,7 @@ static struct platform_driver da903x_regulator_driver = { .owner = THIS_MODULE, }, .probe = da903x_regulator_probe, - .remove = da903x_regulator_remove, + .remove = __devexit_p(da903x_regulator_remove), }; static int __init da903x_regulator_init(void) diff --git a/drivers/rtc/rtc-pl030.c b/drivers/rtc/rtc-pl030.c index 826153552157..aaf1f75fa293 100644 --- a/drivers/rtc/rtc-pl030.c +++ b/drivers/rtc/rtc-pl030.c @@ -102,7 +102,7 @@ static const struct rtc_class_ops pl030_ops = { .set_alarm = pl030_set_alarm, }; -static int pl030_probe(struct amba_device *dev, void *id) +static int pl030_probe(struct amba_device *dev, struct amba_id *id) { struct pl030_rtc *rtc; int ret; diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index 333eec689d2f..451fc13784d1 100644 --- a/drivers/rtc/rtc-pl031.c +++ b/drivers/rtc/rtc-pl031.c @@ -127,7 +127,7 @@ static int pl031_remove(struct amba_device *adev) return 0; } -static int pl031_probe(struct amba_device *adev, void *id) +static int pl031_probe(struct amba_device *adev, struct amba_id *id) { int ret; struct pl031_local *ldata; diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index 8b7983aba8f7..36c21b19e5d7 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -1978,7 +1978,8 @@ static void twa_unmap_scsi_data(TW_Device_Extension *tw_dev, int request_id) { struct scsi_cmnd *cmd = tw_dev->srb[request_id]; - scsi_dma_unmap(cmd); + if (cmd->SCp.phase == TW_PHASE_SGLIST) + scsi_dma_unmap(cmd); } /* End twa_unmap_scsi_data() */ /* scsi_host_template initializer */ diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c index c03f1d2c9e2e..faa0fcfed71e 100644 --- a/drivers/scsi/3w-xxxx.c +++ b/drivers/scsi/3w-xxxx.c @@ -6,7 +6,7 @@ Arnaldo Carvalho de Melo <acme@conectiva.com.br> Brad Strand <linux@3ware.com> - Copyright (C) 1999-2007 3ware Inc. + Copyright (C) 1999-2009 3ware Inc. Kernel compatiblity By: Andre Hedrick <andre@suse.com> Non-Copyright (C) 2000 Andre Hedrick <andre@suse.com> @@ -1294,7 +1294,8 @@ static void tw_unmap_scsi_data(struct pci_dev *pdev, struct scsi_cmnd *cmd) { dprintk(KERN_WARNING "3w-xxxx: tw_unmap_scsi_data()\n"); - scsi_dma_unmap(cmd); + if (cmd->SCp.phase == TW_PHASE_SGLIST) + scsi_dma_unmap(cmd); } /* End tw_unmap_scsi_data() */ /* This function will reset a device extension */ diff --git a/drivers/scsi/3w-xxxx.h b/drivers/scsi/3w-xxxx.h index 8e71e5e122b3..a5a2ba2561d9 100644 --- a/drivers/scsi/3w-xxxx.h +++ b/drivers/scsi/3w-xxxx.h @@ -6,7 +6,7 @@ Arnaldo Carvalho de Melo <acme@conectiva.com.br> Brad Strand <linux@3ware.com> - Copyright (C) 1999-2007 3ware Inc. + Copyright (C) 1999-2009 3ware Inc. Kernel compatiblity By: Andre Hedrick <andre@suse.com> Non-Copyright (C) 2000 Andre Hedrick <andre@suse.com> diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 8ed2990c826e..fb2740789b68 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -628,6 +628,17 @@ config FCOE ---help--- Fibre Channel over Ethernet module +config FCOE_FNIC + tristate "Cisco FNIC Driver" + depends on PCI && X86 + select LIBFC + help + This is support for the Cisco PCI-Express FCoE HBA. + + To compile this driver as a module, choose M here and read + <file:Documentation/scsi/scsi.txt>. + The module will be called fnic. + config SCSI_DMX3191D tristate "DMX3191D SCSI support" depends on PCI && SCSI diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index e7c861ac417d..a5049cfb40ed 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_SCSI_DH) += device_handler/ obj-$(CONFIG_LIBFC) += libfc/ obj-$(CONFIG_LIBFCOE) += fcoe/ obj-$(CONFIG_FCOE) += fcoe/ +obj-$(CONFIG_FCOE_FNIC) += fnic/ obj-$(CONFIG_ISCSI_TCP) += libiscsi.o libiscsi_tcp.o iscsi_tcp.o obj-$(CONFIG_INFINIBAND_ISER) += libiscsi.o obj-$(CONFIG_SCSI_A4000T) += 53c700.o a4000t.o diff --git a/drivers/scsi/fnic/Makefile b/drivers/scsi/fnic/Makefile new file mode 100644 index 000000000000..37c3440bc17c --- /dev/null +++ b/drivers/scsi/fnic/Makefile @@ -0,0 +1,15 @@ +obj-$(CONFIG_FCOE_FNIC) += fnic.o + +fnic-y := \ + fnic_attrs.o \ + fnic_isr.o \ + fnic_main.o \ + fnic_res.o \ + fnic_fcs.o \ + fnic_scsi.o \ + vnic_cq.o \ + vnic_dev.o \ + vnic_intr.o \ + vnic_rq.o \ + vnic_wq_copy.o \ + vnic_wq.o diff --git a/drivers/scsi/fnic/cq_desc.h b/drivers/scsi/fnic/cq_desc.h new file mode 100644 index 000000000000..d1225cf6320e --- /dev/null +++ b/drivers/scsi/fnic/cq_desc.h @@ -0,0 +1,78 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _CQ_DESC_H_ +#define _CQ_DESC_H_ + +/* + * Completion queue descriptor types + */ +enum cq_desc_types { + CQ_DESC_TYPE_WQ_ENET = 0, + CQ_DESC_TYPE_DESC_COPY = 1, + CQ_DESC_TYPE_WQ_EXCH = 2, + CQ_DESC_TYPE_RQ_ENET = 3, + CQ_DESC_TYPE_RQ_FCP = 4, +}; + +/* Completion queue descriptor: 16B + * + * All completion queues have this basic layout. The + * type_specfic area is unique for each completion + * queue type. + */ +struct cq_desc { + __le16 completed_index; + __le16 q_number; + u8 type_specfic[11]; + u8 type_color; +}; + +#define CQ_DESC_TYPE_BITS 4 +#define CQ_DESC_TYPE_MASK ((1 << CQ_DESC_TYPE_BITS) - 1) +#define CQ_DESC_COLOR_MASK 1 +#define CQ_DESC_COLOR_SHIFT 7 +#define CQ_DESC_Q_NUM_BITS 10 +#define CQ_DESC_Q_NUM_MASK ((1 << CQ_DESC_Q_NUM_BITS) - 1) +#define CQ_DESC_COMP_NDX_BITS 12 +#define CQ_DESC_COMP_NDX_MASK ((1 << CQ_DESC_COMP_NDX_BITS) - 1) + +static inline void cq_desc_dec(const struct cq_desc *desc_arg, + u8 *type, u8 *color, u16 *q_number, u16 *completed_index) +{ + const struct cq_desc *desc = desc_arg; + const u8 type_color = desc->type_color; + + *color = (type_color >> CQ_DESC_COLOR_SHIFT) & CQ_DESC_COLOR_MASK; + + /* + * Make sure color bit is read from desc *before* other fields + * are read from desc. Hardware guarantees color bit is last + * bit (byte) written. Adding the rmb() prevents the compiler + * and/or CPU from reordering the reads which would potentially + * result in reading stale values. + */ + + rmb(); + + *type = type_color & CQ_DESC_TYPE_MASK; + *q_number = le16_to_cpu(desc->q_number) & CQ_DESC_Q_NUM_MASK; + *completed_index = le16_to_cpu(desc->completed_index) & + CQ_DESC_COMP_NDX_MASK; +} + +#endif /* _CQ_DESC_H_ */ diff --git a/drivers/scsi/fnic/cq_enet_desc.h b/drivers/scsi/fnic/cq_enet_desc.h new file mode 100644 index 000000000000..a9fa26f82ddd --- /dev/null +++ b/drivers/scsi/fnic/cq_enet_desc.h @@ -0,0 +1,167 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _CQ_ENET_DESC_H_ +#define _CQ_ENET_DESC_H_ + +#include "cq_desc.h" + +/* Ethernet completion queue descriptor: 16B */ +struct cq_enet_wq_desc { + __le16 completed_index; + __le16 q_number; + u8 reserved[11]; + u8 type_color; +}; + +static inline void cq_enet_wq_desc_dec(struct cq_enet_wq_desc *desc, + u8 *type, u8 *color, u16 *q_number, u16 *completed_index) +{ + cq_desc_dec((struct cq_desc *)desc, type, + color, q_number, completed_index); +} + +/* Completion queue descriptor: Ethernet receive queue, 16B */ +struct cq_enet_rq_desc { + __le16 completed_index_flags; + __le16 q_number_rss_type_flags; + __le32 rss_hash; + __le16 bytes_written_flags; + __le16 vlan; + __le16 checksum_fcoe; + u8 flags; + u8 type_color; +}; + +#define CQ_ENET_RQ_DESC_FLAGS_INGRESS_PORT (0x1 << 12) +#define CQ_ENET_RQ_DESC_FLAGS_FCOE (0x1 << 13) +#define CQ_ENET_RQ_DESC_FLAGS_EOP (0x1 << 14) +#define CQ_ENET_RQ_DESC_FLAGS_SOP (0x1 << 15) + +#define CQ_ENET_RQ_DESC_RSS_TYPE_BITS 4 +#define CQ_ENET_RQ_DESC_RSS_TYPE_MASK \ + ((1 << CQ_ENET_RQ_DESC_RSS_TYPE_BITS) - 1) +#define CQ_ENET_RQ_DESC_RSS_TYPE_NONE 0 +#define CQ_ENET_RQ_DESC_RSS_TYPE_IPv4 1 +#define CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4 2 +#define CQ_ENET_RQ_DESC_RSS_TYPE_IPv6 3 +#define CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6 4 +#define CQ_ENET_RQ_DESC_RSS_TYPE_IPv6_EX 5 +#define CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6_EX 6 + +#define CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC (0x1 << 14) + +#define CQ_ENET_RQ_DESC_BYTES_WRITTEN_BITS 14 +#define CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK \ + ((1 << CQ_ENET_RQ_DESC_BYTES_WRITTEN_BITS) - 1) +#define CQ_ENET_RQ_DESC_FLAGS_TRUNCATED (0x1 << 14) +#define CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED (0x1 << 15) + +#define CQ_ENET_RQ_DESC_FCOE_SOF_BITS 4 +#define CQ_ENET_RQ_DESC_FCOE_SOF_MASK \ + ((1 << CQ_ENET_RQ_DESC_FCOE_SOF_BITS) - 1) +#define CQ_ENET_RQ_DESC_FCOE_EOF_BITS 8 +#define CQ_ENET_RQ_DESC_FCOE_EOF_MASK \ + ((1 << CQ_ENET_RQ_DESC_FCOE_EOF_BITS) - 1) +#define CQ_ENET_RQ_DESC_FCOE_EOF_SHIFT 8 + +#define CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK (0x1 << 0) +#define CQ_ENET_RQ_DESC_FCOE_FC_CRC_OK (0x1 << 0) +#define CQ_ENET_RQ_DESC_FLAGS_UDP (0x1 << 1) +#define CQ_ENET_RQ_DESC_FCOE_ENC_ERROR (0x1 << 1) +#define CQ_ENET_RQ_DESC_FLAGS_TCP (0x1 << 2) +#define CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK (0x1 << 3) +#define CQ_ENET_RQ_DESC_FLAGS_IPV6 (0x1 << 4) +#define CQ_ENET_RQ_DESC_FLAGS_IPV4 (0x1 << 5) +#define CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT (0x1 << 6) +#define CQ_ENET_RQ_DESC_FLAGS_FCS_OK (0x1 << 7) + +static inline void cq_enet_rq_desc_dec(struct cq_enet_rq_desc *desc, + u8 *type, u8 *color, u16 *q_number, u16 *completed_index, + u8 *ingress_port, u8 *fcoe, u8 *eop, u8 *sop, u8 *rss_type, + u8 *csum_not_calc, u32 *rss_hash, u16 *bytes_written, u8 *packet_error, + u8 *vlan_stripped, u16 *vlan, u16 *checksum, u8 *fcoe_sof, + u8 *fcoe_fc_crc_ok, u8 *fcoe_enc_error, u8 *fcoe_eof, + u8 *tcp_udp_csum_ok, u8 *udp, u8 *tcp, u8 *ipv4_csum_ok, + u8 *ipv6, u8 *ipv4, u8 *ipv4_fragment, u8 *fcs_ok) +{ + u16 completed_index_flags = le16_to_cpu(desc->completed_index_flags); + u16 q_number_rss_type_flags = + le16_to_cpu(desc->q_number_rss_type_flags); + u16 bytes_written_flags = le16_to_cpu(desc->bytes_written_flags); + + cq_desc_dec((struct cq_desc *)desc, type, + color, q_number, completed_index); + + *ingress_port = (completed_index_flags & + CQ_ENET_RQ_DESC_FLAGS_INGRESS_PORT) ? 1 : 0; + *fcoe = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_FCOE) ? + 1 : 0; + *eop = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_EOP) ? + 1 : 0; + *sop = (completed_index_flags & CQ_ENET_RQ_DESC_FLAGS_SOP) ? + 1 : 0; + + *rss_type = (u8)((q_number_rss_type_flags >> CQ_DESC_Q_NUM_BITS) & + CQ_ENET_RQ_DESC_RSS_TYPE_MASK); + *csum_not_calc = (q_number_rss_type_flags & + CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC) ? 1 : 0; + + *rss_hash = le32_to_cpu(desc->rss_hash); + + *bytes_written = bytes_written_flags & + CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; + *packet_error = (bytes_written_flags & + CQ_ENET_RQ_DESC_FLAGS_TRUNCATED) ? 1 : 0; + *vlan_stripped = (bytes_written_flags & + CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) ? 1 : 0; + + *vlan = le16_to_cpu(desc->vlan); + + if (*fcoe) { + *fcoe_sof = (u8)(le16_to_cpu(desc->checksum_fcoe) & + CQ_ENET_RQ_DESC_FCOE_SOF_MASK); + *fcoe_fc_crc_ok = (desc->flags & + CQ_ENET_RQ_DESC_FCOE_FC_CRC_OK) ? 1 : 0; + *fcoe_enc_error = (desc->flags & + CQ_ENET_RQ_DESC_FCOE_ENC_ERROR) ? 1 : 0; + *fcoe_eof = (u8)((desc->checksum_fcoe >> + CQ_ENET_RQ_DESC_FCOE_EOF_SHIFT) & + CQ_ENET_RQ_DESC_FCOE_EOF_MASK); + *checksum = 0; + } else { + *fcoe_sof = 0; + *fcoe_fc_crc_ok = 0; + *fcoe_enc_error = 0; + *fcoe_eof = 0; + *checksum = le16_to_cpu(desc->checksum_fcoe); + } + + *tcp_udp_csum_ok = + (desc->flags & CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK) ? 1 : 0; + *udp = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_UDP) ? 1 : 0; + *tcp = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_TCP) ? 1 : 0; + *ipv4_csum_ok = + (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK) ? 1 : 0; + *ipv6 = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV6) ? 1 : 0; + *ipv4 = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4) ? 1 : 0; + *ipv4_fragment = + (desc->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT) ? 1 : 0; + *fcs_ok = (desc->flags & CQ_ENET_RQ_DESC_FLAGS_FCS_OK) ? 1 : 0; +} + +#endif /* _CQ_ENET_DESC_H_ */ diff --git a/drivers/scsi/fnic/cq_exch_desc.h b/drivers/scsi/fnic/cq_exch_desc.h new file mode 100644 index 000000000000..501660cfe228 --- /dev/null +++ b/drivers/scsi/fnic/cq_exch_desc.h @@ -0,0 +1,182 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _CQ_EXCH_DESC_H_ +#define _CQ_EXCH_DESC_H_ + +#include "cq_desc.h" + +/* Exchange completion queue descriptor: 16B */ +struct cq_exch_wq_desc { + u16 completed_index; + u16 q_number; + u16 exchange_id; + u8 tmpl; + u8 reserved0; + u32 reserved1; + u8 exch_status; + u8 reserved2[2]; + u8 type_color; +}; + +#define CQ_EXCH_WQ_STATUS_BITS 2 +#define CQ_EXCH_WQ_STATUS_MASK ((1 << CQ_EXCH_WQ_STATUS_BITS) - 1) + +enum cq_exch_status_types { + CQ_EXCH_WQ_STATUS_TYPE_COMPLETE = 0, + CQ_EXCH_WQ_STATUS_TYPE_ABORT = 1, + CQ_EXCH_WQ_STATUS_TYPE_SGL_EOF = 2, + CQ_EXCH_WQ_STATUS_TYPE_TMPL_ERR = 3, +}; + +static inline void cq_exch_wq_desc_dec(struct cq_exch_wq_desc *desc_ptr, + u8 *type, + u8 *color, + u16 *q_number, + u16 *completed_index, + u8 *exch_status) +{ + cq_desc_dec((struct cq_desc *)desc_ptr, type, + color, q_number, completed_index); + *exch_status = desc_ptr->exch_status & CQ_EXCH_WQ_STATUS_MASK; +} + +struct cq_fcp_rq_desc { + u16 completed_index_eop_sop_prt; + u16 q_number; + u16 exchange_id; + u16 tmpl; + u16 bytes_written; + u16 vlan; + u8 sof; + u8 eof; + u8 fcs_fer_fck; + u8 type_color; +}; + +#define CQ_FCP_RQ_DESC_FLAGS_SOP (1 << 15) +#define CQ_FCP_RQ_DESC_FLAGS_EOP (1 << 14) +#define CQ_FCP_RQ_DESC_FLAGS_PRT (1 << 12) +#define CQ_FCP_RQ_DESC_TMPL_MASK 0x1f +#define CQ_FCP_RQ_DESC_BYTES_WRITTEN_MASK 0x3fff +#define CQ_FCP_RQ_DESC_PACKET_ERR_SHIFT 14 +#define CQ_FCP_RQ_DESC_PACKET_ERR_MASK (1 << CQ_FCP_RQ_DESC_PACKET_ERR_SHIFT) +#define CQ_FCP_RQ_DESC_VS_STRIPPED_SHIFT 15 +#define CQ_FCP_RQ_DESC_VS_STRIPPED_MASK (1 << CQ_FCP_RQ_DESC_VS_STRIPPED_SHIFT) +#define CQ_FCP_RQ_DESC_FC_CRC_OK_MASK 0x1 +#define CQ_FCP_RQ_DESC_FCOE_ERR_SHIFT 1 +#define CQ_FCP_RQ_DESC_FCOE_ERR_MASK (1 << CQ_FCP_RQ_DESC_FCOE_ERR_SHIFT) +#define CQ_FCP_RQ_DESC_FCS_OK_SHIFT 7 +#define CQ_FCP_RQ_DESC_FCS_OK_MASK (1 << CQ_FCP_RQ_DESC_FCS_OK_SHIFT) + +static inline void cq_fcp_rq_desc_dec(struct cq_fcp_rq_desc *desc_ptr, + u8 *type, + u8 *color, + u16 *q_number, + u16 *completed_index, + u8 *eop, + u8 *sop, + u8 *fck, + u16 *exchange_id, + u16 *tmpl, + u32 *bytes_written, + u8 *sof, + u8 *eof, + u8 *ingress_port, + u8 *packet_err, + u8 *fcoe_err, + u8 *fcs_ok, + u8 *vlan_stripped, + u16 *vlan) +{ + cq_desc_dec((struct cq_desc *)desc_ptr, type, + color, q_number, completed_index); + *eop = (desc_ptr->completed_index_eop_sop_prt & + CQ_FCP_RQ_DESC_FLAGS_EOP) ? 1 : 0; + *sop = (desc_ptr->completed_index_eop_sop_prt & + CQ_FCP_RQ_DESC_FLAGS_SOP) ? 1 : 0; + *ingress_port = + (desc_ptr->completed_index_eop_sop_prt & + CQ_FCP_RQ_DESC_FLAGS_PRT) ? 1 : 0; + *exchange_id = desc_ptr->exchange_id; + *tmpl = desc_ptr->tmpl & CQ_FCP_RQ_DESC_TMPL_MASK; + *bytes_written = + desc_ptr->bytes_written & CQ_FCP_RQ_DESC_BYTES_WRITTEN_MASK; + *packet_err = + (desc_ptr->bytes_written & CQ_FCP_RQ_DESC_PACKET_ERR_MASK) >> + CQ_FCP_RQ_DESC_PACKET_ERR_SHIFT; + *vlan_stripped = + (desc_ptr->bytes_written & CQ_FCP_RQ_DESC_VS_STRIPPED_MASK) >> + CQ_FCP_RQ_DESC_VS_STRIPPED_SHIFT; + *vlan = desc_ptr->vlan; + *sof = desc_ptr->sof; + *fck = desc_ptr->fcs_fer_fck & CQ_FCP_RQ_DESC_FC_CRC_OK_MASK; + *fcoe_err = (desc_ptr->fcs_fer_fck & CQ_FCP_RQ_DESC_FCOE_ERR_MASK) >> + CQ_FCP_RQ_DESC_FCOE_ERR_SHIFT; + *eof = desc_ptr->eof; + *fcs_ok = + (desc_ptr->fcs_fer_fck & CQ_FCP_RQ_DESC_FCS_OK_MASK) >> + CQ_FCP_RQ_DESC_FCS_OK_SHIFT; +} + +struct cq_sgl_desc { + u16 exchange_id; + u16 q_number; + u32 active_burst_offset; + u32 tot_data_bytes; + u16 tmpl; + u8 sgl_err; + u8 type_color; +}; + +enum cq_sgl_err_types { + CQ_SGL_ERR_NO_ERROR = 0, + CQ_SGL_ERR_OVERFLOW, /* data ran beyond end of SGL */ + CQ_SGL_ERR_SGL_LCL_ADDR_ERR, /* sgl access to local vnic addr illegal*/ + CQ_SGL_ERR_ADDR_RSP_ERR, /* sgl address error */ + CQ_SGL_ERR_DATA_RSP_ERR, /* sgl data rsp error */ + CQ_SGL_ERR_CNT_ZERO_ERR, /* SGL count is 0 */ + CQ_SGL_ERR_CNT_MAX_ERR, /* SGL count is larger than supported */ + CQ_SGL_ERR_ORDER_ERR, /* frames recv on both ports, order err */ + CQ_SGL_ERR_DATA_LCL_ADDR_ERR,/* sgl data buf to local vnic addr ill */ + CQ_SGL_ERR_HOST_CQ_ERR, /* host cq entry to local vnic addr ill */ +}; + +#define CQ_SGL_SGL_ERR_MASK 0x1f +#define CQ_SGL_TMPL_MASK 0x1f + +static inline void cq_sgl_desc_dec(struct cq_sgl_desc *desc_ptr, + u8 *type, + u8 *color, + u16 *q_number, + u16 *exchange_id, + u32 *active_burst_offset, + u32 *tot_data_bytes, + u16 *tmpl, + u8 *sgl_err) +{ + /* Cheat a little by assuming exchange_id is the same as completed + index */ + cq_desc_dec((struct cq_desc *)desc_ptr, type, color, q_number, + exchange_id); + *active_burst_offset = desc_ptr->active_burst_offset; + *tot_data_bytes = desc_ptr->tot_data_bytes; + *tmpl = desc_ptr->tmpl & CQ_SGL_TMPL_MASK; + *sgl_err = desc_ptr->sgl_err & CQ_SGL_SGL_ERR_MASK; +} + +#endif /* _CQ_EXCH_DESC_H_ */ diff --git a/drivers/scsi/fnic/fcpio.h b/drivers/scsi/fnic/fcpio.h new file mode 100644 index 000000000000..12d770d885c5 --- /dev/null +++ b/drivers/scsi/fnic/fcpio.h @@ -0,0 +1,780 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _FCPIO_H_ +#define _FCPIO_H_ + +#include <linux/if_ether.h> + +/* + * This header file includes all of the data structures used for + * communication by the host driver to the fcp firmware. + */ + +/* + * Exchange and sequence id space allocated to the host driver + */ +#define FCPIO_HOST_EXCH_RANGE_START 0x1000 +#define FCPIO_HOST_EXCH_RANGE_END 0x1fff +#define FCPIO_HOST_SEQ_ID_RANGE_START 0x80 +#define FCPIO_HOST_SEQ_ID_RANGE_END 0xff + +/* + * Command entry type + */ +enum fcpio_type { + /* + * Initiator request types + */ + FCPIO_ICMND_16 = 0x1, + FCPIO_ICMND_32, + FCPIO_ICMND_CMPL, + FCPIO_ITMF, + FCPIO_ITMF_CMPL, + + /* + * Target request types + */ + FCPIO_TCMND_16 = 0x11, + FCPIO_TCMND_32, + FCPIO_TDATA, + FCPIO_TXRDY, + FCPIO_TRSP, + FCPIO_TDRSP_CMPL, + FCPIO_TTMF, + FCPIO_TTMF_ACK, + FCPIO_TABORT, + FCPIO_TABORT_CMPL, + + /* + * Misc request types + */ + FCPIO_ACK = 0x20, + FCPIO_RESET, + FCPIO_RESET_CMPL, + FCPIO_FLOGI_REG, + FCPIO_FLOGI_REG_CMPL, + FCPIO_ECHO, + FCPIO_ECHO_CMPL, + FCPIO_LUNMAP_CHNG, + FCPIO_LUNMAP_REQ, + FCPIO_LUNMAP_REQ_CMPL, + FCPIO_FLOGI_FIP_REG, + FCPIO_FLOGI_FIP_REG_CMPL, +}; + +/* + * Header status codes from the firmware + */ +enum fcpio_status { + FCPIO_SUCCESS = 0, /* request was successful */ + + /* + * If a request to the firmware is rejected, the original request + * header will be returned with the status set to one of the following: + */ + FCPIO_INVALID_HEADER, /* header contains invalid data */ + FCPIO_OUT_OF_RESOURCE, /* out of resources to complete request */ + FCPIO_INVALID_PARAM, /* some parameter in request is invalid */ + FCPIO_REQ_NOT_SUPPORTED, /* request type is not supported */ + FCPIO_IO_NOT_FOUND, /* requested I/O was not found */ + + /* + * Once a request is processed, the firmware will usually return + * a cmpl message type. In cases where errors occurred, + * the header status field would be filled in with one of the following: + */ + FCPIO_ABORTED = 0x41, /* request was aborted */ + FCPIO_TIMEOUT, /* request was timed out */ + FCPIO_SGL_INVALID, /* request was aborted due to sgl error */ + FCPIO_MSS_INVALID, /* request was aborted due to mss error */ + FCPIO_DATA_CNT_MISMATCH, /* recv/sent more/less data than exp. */ + FCPIO_FW_ERR, /* request was terminated due to fw error */ + FCPIO_ITMF_REJECTED, /* itmf req was rejected by remote node */ + FCPIO_ITMF_FAILED, /* itmf req was failed by remote node */ + FCPIO_ITMF_INCORRECT_LUN, /* itmf req targeted incorrect LUN */ + FCPIO_CMND_REJECTED, /* request was invalid and rejected */ + FCPIO_NO_PATH_AVAIL, /* no paths to the lun was available */ + FCPIO_PATH_FAILED, /* i/o sent to current path failed */ + FCPIO_LUNMAP_CHNG_PEND, /* i/o rejected due to lunmap change */ +}; + +/* + * The header command tag. All host requests will use the "tag" field + * to mark commands with a unique tag. When the firmware responds to + * a host request, it will copy the tag field into the response. + * + * The only firmware requests that will use the rx_id/ox_id fields instead + * of the tag field will be the target command and target task management + * requests. These two requests do not have corresponding host requests + * since they come directly from the FC initiator on the network. + */ +struct fcpio_tag { + union { + u32 req_id; + struct { + u16 rx_id; + u16 ox_id; + } ex_id; + } u; +}; + +static inline void +fcpio_tag_id_enc(struct fcpio_tag *tag, u32 id) +{ + tag->u.req_id = id; +} + +static inline void +fcpio_tag_id_dec(struct fcpio_tag *tag, u32 *id) +{ + *id = tag->u.req_id; +} + +static inline void +fcpio_tag_exid_enc(struct fcpio_tag *tag, u16 ox_id, u16 rx_id) +{ + tag->u.ex_id.rx_id = rx_id; + tag->u.ex_id.ox_id = ox_id; +} + +static inline void +fcpio_tag_exid_dec(struct fcpio_tag *tag, u16 *ox_id, u16 *rx_id) +{ + *rx_id = tag->u.ex_id.rx_id; + *ox_id = tag->u.ex_id.ox_id; +} + +/* + * The header for an fcpio request, whether from the firmware or from the + * host driver + */ +struct fcpio_header { + u8 type; /* enum fcpio_type */ + u8 status; /* header status entry */ + u16 _resvd; /* reserved */ + struct fcpio_tag tag; /* header tag */ +}; + +static inline void +fcpio_header_enc(struct fcpio_header *hdr, + u8 type, u8 status, + struct fcpio_tag tag) +{ + hdr->type = type; + hdr->status = status; + hdr->_resvd = 0; + hdr->tag = tag; +} + +static inline void +fcpio_header_dec(struct fcpio_header *hdr, + u8 *type, u8 *status, + struct fcpio_tag *tag) +{ + *type = hdr->type; + *status = hdr->status; + *tag = hdr->tag; +} + +#define CDB_16 16 +#define CDB_32 32 +#define LUN_ADDRESS 8 + +/* + * fcpio_icmnd_16: host -> firmware request + * + * used for sending out an initiator SCSI 16-byte command + */ +struct fcpio_icmnd_16 { + u32 lunmap_id; /* index into lunmap table */ + u8 special_req_flags; /* special exchange request flags */ + u8 _resvd0[3]; /* reserved */ + u32 sgl_cnt; /* scatter-gather list count */ + u32 sense_len; /* sense buffer length */ + u64 sgl_addr; /* scatter-gather list addr */ + u64 sense_addr; /* sense buffer address */ + u8 crn; /* SCSI Command Reference No. */ + u8 pri_ta; /* SCSI Priority and Task attribute */ + u8 _resvd1; /* reserved: should be 0 */ + u8 flags; /* command flags */ + u8 scsi_cdb[CDB_16]; /* SCSI Cmnd Descriptor Block */ + u32 data_len; /* length of data expected */ + u8 lun[LUN_ADDRESS]; /* FC vNIC only: LUN address */ + u8 _resvd2; /* reserved */ + u8 d_id[3]; /* FC vNIC only: Target D_ID */ + u16 mss; /* FC vNIC only: max burst */ + u16 _resvd3; /* reserved */ + u32 r_a_tov; /* FC vNIC only: Res. Alloc Timeout */ + u32 e_d_tov; /* FC vNIC only: Err Detect Timeout */ +}; + +/* + * Special request flags + */ +#define FCPIO_ICMND_SRFLAG_RETRY 0x01 /* Enable Retry handling on exchange */ + +/* + * Priority/Task Attribute settings + */ +#define FCPIO_ICMND_PTA_SIMPLE 0 /* simple task attribute */ +#define FCPIO_ICMND_PTA_HEADQ 1 /* head of queue task attribute */ +#define FCPIO_ICMND_PTA_ORDERED 2 /* ordered task attribute */ +#define FCPIO_ICMND_PTA_ACA 4 /* auto contingent allegiance */ +#define FCPIO_ICMND_PRI_SHIFT 3 /* priority field starts in bit 3 */ + +/* + * Command flags + */ +#define FCPIO_ICMND_RDDATA 0x02 /* read data */ +#define FCPIO_ICMND_WRDATA 0x01 /* write data */ + +/* + * fcpio_icmnd_32: host -> firmware request + * + * used for sending out an initiator SCSI 32-byte command + */ +struct fcpio_icmnd_32 { + u32 lunmap_id; /* index into lunmap table */ + u8 special_req_flags; /* special exchange request flags */ + u8 _resvd0[3]; /* reserved */ + u32 sgl_cnt; /* scatter-gather list count */ + u32 sense_len; /* sense buffer length */ + u64 sgl_addr; /* scatter-gather list addr */ + u64 sense_addr; /* sense buffer address */ + u8 crn; /* SCSI Command Reference No. */ + u8 pri_ta; /* SCSI Priority and Task attribute */ + u8 _resvd1; /* reserved: should be 0 */ + u8 flags; /* command flags */ + u8 scsi_cdb[CDB_32]; /* SCSI Cmnd Descriptor Block */ + u32 data_len; /* length of data expected */ + u8 lun[LUN_ADDRESS]; /* FC vNIC only: LUN address */ + u8 _resvd2; /* reserved */ + u8 d_id[3]; /* FC vNIC only: Target D_ID */ + u16 mss; /* FC vNIC only: max burst */ + u16 _resvd3; /* reserved */ + u32 r_a_tov; /* FC vNIC only: Res. Alloc Timeout */ + u32 e_d_tov; /* FC vNIC only: Error Detect Timeout */ +}; + +/* + * fcpio_itmf: host -> firmware request + * + * used for requesting the firmware to abort a request and/or send out + * a task management function + * + * The t_tag field is only needed when the request type is ABT_TASK. + */ +struct fcpio_itmf { + u32 lunmap_id; /* index into lunmap table */ + u32 tm_req; /* SCSI Task Management request */ + u32 t_tag; /* header tag of fcpio to be aborted */ + u32 _resvd; /* _reserved */ + u8 lun[LUN_ADDRESS]; /* FC vNIC only: LUN address */ + u8 _resvd1; /* reserved */ + u8 d_id[3]; /* FC vNIC only: Target D_ID */ + u32 r_a_tov; /* FC vNIC only: R_A_TOV in msec */ + u32 e_d_tov; /* FC vNIC only: E_D_TOV in msec */ +}; + +/* + * Task Management request + */ +enum fcpio_itmf_tm_req_type { + FCPIO_ITMF_ABT_TASK_TERM = 0x01, /* abort task and terminate */ + FCPIO_ITMF_ABT_TASK, /* abort task and issue abts */ + FCPIO_ITMF_ABT_TASK_SET, /* abort task set */ + FCPIO_ITMF_CLR_TASK_SET, /* clear task set */ + FCPIO_ITMF_LUN_RESET, /* logical unit reset task mgmt */ + FCPIO_ITMF_CLR_ACA, /* Clear ACA condition */ +}; + +/* + * fcpio_tdata: host -> firmware request + * + * used for requesting the firmware to send out a read data transfer for a + * target command + */ +struct fcpio_tdata { + u16 rx_id; /* FC rx_id of target command */ + u16 flags; /* command flags */ + u32 rel_offset; /* data sequence relative offset */ + u32 sgl_cnt; /* scatter-gather list count */ + u32 data_len; /* length of data expected to send */ + u64 sgl_addr; /* scatter-gather list address */ +}; + +/* + * Command flags + */ +#define FCPIO_TDATA_SCSI_RSP 0x01 /* send a scsi resp. after last frame */ + +/* + * fcpio_txrdy: host -> firmware request + * + * used for requesting the firmware to send out a write data transfer for a + * target command + */ +struct fcpio_txrdy { + u16 rx_id; /* FC rx_id of target command */ + u16 _resvd0; /* reserved */ + u32 rel_offset; /* data sequence relative offset */ + u32 sgl_cnt; /* scatter-gather list count */ + u32 data_len; /* length of data expected to send */ + u64 sgl_addr; /* scatter-gather list address */ +}; + +/* + * fcpio_trsp: host -> firmware request + * + * used for requesting the firmware to send out a response for a target + * command + */ +struct fcpio_trsp { + u16 rx_id; /* FC rx_id of target command */ + u16 _resvd0; /* reserved */ + u32 sense_len; /* sense data buffer length */ + u64 sense_addr; /* sense data buffer address */ + u16 _resvd1; /* reserved */ + u8 flags; /* response request flags */ + u8 scsi_status; /* SCSI status */ + u32 residual; /* SCSI data residual value of I/O */ +}; + +/* + * resposnse request flags + */ +#define FCPIO_TRSP_RESID_UNDER 0x08 /* residual is valid and is underflow */ +#define FCPIO_TRSP_RESID_OVER 0x04 /* residual is valid and is overflow */ + +/* + * fcpio_ttmf_ack: host -> firmware response + * + * used by the host to indicate to the firmware it has received and processed + * the target tmf request + */ +struct fcpio_ttmf_ack { + u16 rx_id; /* FC rx_id of target command */ + u16 _resvd0; /* reserved */ + u32 tmf_status; /* SCSI task management status */ +}; + +/* + * fcpio_tabort: host -> firmware request + * + * used by the host to request the firmware to abort a target request that was + * received by the firmware + */ +struct fcpio_tabort { + u16 rx_id; /* rx_id of the target request */ +}; + +/* + * fcpio_reset: host -> firmware request + * + * used by the host to signal a reset of the driver to the firmware + * and to request firmware to clean up all outstanding I/O + */ +struct fcpio_reset { + u32 _resvd; +}; + +enum fcpio_flogi_reg_format_type { + FCPIO_FLOGI_REG_DEF_DEST = 0, /* Use the oui | s_id mac format */ + FCPIO_FLOGI_REG_GW_DEST, /* Use the fixed gateway mac */ +}; + +/* + * fcpio_flogi_reg: host -> firmware request + * + * fc vnic only + * used by the host to notify the firmware of the lif's s_id + * and destination mac address format + */ +struct fcpio_flogi_reg { + u8 format; + u8 s_id[3]; /* FC vNIC only: Source S_ID */ + u8 gateway_mac[ETH_ALEN]; /* Destination gateway mac */ + u16 _resvd; + u32 r_a_tov; /* R_A_TOV in msec */ + u32 e_d_tov; /* E_D_TOV in msec */ +}; + +/* + * fcpio_echo: host -> firmware request + * + * sends a heartbeat echo request to the firmware + */ +struct fcpio_echo { + u32 _resvd; +}; + +/* + * fcpio_lunmap_req: host -> firmware request + * + * scsi vnic only + * sends a request to retrieve the lunmap table for scsi vnics + */ +struct fcpio_lunmap_req { + u64 addr; /* address of the buffer */ + u32 len; /* len of the buffer */ +}; + +/* + * fcpio_flogi_fip_reg: host -> firmware request + * + * fc vnic only + * used by the host to notify the firmware of the lif's s_id + * and destination mac address format + */ +struct fcpio_flogi_fip_reg { + u8 _resvd0; + u8 s_id[3]; /* FC vNIC only: Source S_ID */ + u8 fcf_mac[ETH_ALEN]; /* FCF Target destination mac */ + u16 _resvd1; + u32 r_a_tov; /* R_A_TOV in msec */ + u32 e_d_tov; /* E_D_TOV in msec */ + u8 ha_mac[ETH_ALEN]; /* Host adapter source mac */ + u16 _resvd2; +}; + +/* + * Basic structure for all fcpio structures that are sent from the host to the + * firmware. They are 128 bytes per structure. + */ +#define FCPIO_HOST_REQ_LEN 128 /* expected length of host requests */ + +struct fcpio_host_req { + struct fcpio_header hdr; + + union { + /* + * Defines space needed for request + */ + u8 buf[FCPIO_HOST_REQ_LEN - sizeof(struct fcpio_header)]; + + /* + * Initiator host requests + */ + struct fcpio_icmnd_16 icmnd_16; + struct fcpio_icmnd_32 icmnd_32; + struct fcpio_itmf itmf; + + /* + * Target host requests + */ + struct fcpio_tdata tdata; + struct fcpio_txrdy txrdy; + struct fcpio_trsp trsp; + struct fcpio_ttmf_ack ttmf_ack; + struct fcpio_tabort tabort; + + /* + * Misc requests + */ + struct fcpio_reset reset; + struct fcpio_flogi_reg flogi_reg; + struct fcpio_echo echo; + struct fcpio_lunmap_req lunmap_req; + struct fcpio_flogi_fip_reg flogi_fip_reg; + } u; +}; + +/* + * fcpio_icmnd_cmpl: firmware -> host response + * + * used for sending the host a response to an initiator command + */ +struct fcpio_icmnd_cmpl { + u8 _resvd0[6]; /* reserved */ + u8 flags; /* response flags */ + u8 scsi_status; /* SCSI status */ + u32 residual; /* SCSI data residual length */ + u32 sense_len; /* SCSI sense length */ +}; + +/* + * response flags + */ +#define FCPIO_ICMND_CMPL_RESID_UNDER 0x08 /* resid under and valid */ +#define FCPIO_ICMND_CMPL_RESID_OVER 0x04 /* resid over and valid */ + +/* + * fcpio_itmf_cmpl: firmware -> host response + * + * used for sending the host a response for a itmf request + */ +struct fcpio_itmf_cmpl { + u32 _resvd; /* reserved */ +}; + +/* + * fcpio_tcmnd_16: firmware -> host request + * + * used by the firmware to notify the host of an incoming target SCSI 16-Byte + * request + */ +struct fcpio_tcmnd_16 { + u8 lun[LUN_ADDRESS]; /* FC vNIC only: LUN address */ + u8 crn; /* SCSI Command Reference No. */ + u8 pri_ta; /* SCSI Priority and Task attribute */ + u8 _resvd2; /* reserved: should be 0 */ + u8 flags; /* command flags */ + u8 scsi_cdb[CDB_16]; /* SCSI Cmnd Descriptor Block */ + u32 data_len; /* length of data expected */ + u8 _resvd1; /* reserved */ + u8 s_id[3]; /* FC vNIC only: Source S_ID */ +}; + +/* + * Priority/Task Attribute settings + */ +#define FCPIO_TCMND_PTA_SIMPLE 0 /* simple task attribute */ +#define FCPIO_TCMND_PTA_HEADQ 1 /* head of queue task attribute */ +#define FCPIO_TCMND_PTA_ORDERED 2 /* ordered task attribute */ +#define FCPIO_TCMND_PTA_ACA 4 /* auto contingent allegiance */ +#define FCPIO_TCMND_PRI_SHIFT 3 /* priority field starts in bit 3 */ + +/* + * Command flags + */ +#define FCPIO_TCMND_RDDATA 0x02 /* read data */ +#define FCPIO_TCMND_WRDATA 0x01 /* write data */ + +/* + * fcpio_tcmnd_32: firmware -> host request + * + * used by the firmware to notify the host of an incoming target SCSI 32-Byte + * request + */ +struct fcpio_tcmnd_32 { + u8 lun[LUN_ADDRESS]; /* FC vNIC only: LUN address */ + u8 crn; /* SCSI Command Reference No. */ + u8 pri_ta; /* SCSI Priority and Task attribute */ + u8 _resvd2; /* reserved: should be 0 */ + u8 flags; /* command flags */ + u8 scsi_cdb[CDB_32]; /* SCSI Cmnd Descriptor Block */ + u32 data_len; /* length of data expected */ + u8 _resvd0; /* reserved */ + u8 s_id[3]; /* FC vNIC only: Source S_ID */ +}; + +/* + * fcpio_tdrsp_cmpl: firmware -> host response + * + * used by the firmware to notify the host of a response to a host target + * command + */ +struct fcpio_tdrsp_cmpl { + u16 rx_id; /* rx_id of the target request */ + u16 _resvd0; /* reserved */ +}; + +/* + * fcpio_ttmf: firmware -> host request + * + * used by the firmware to notify the host of an incoming task management + * function request + */ +struct fcpio_ttmf { + u8 _resvd0; /* reserved */ + u8 s_id[3]; /* FC vNIC only: Source S_ID */ + u8 lun[LUN_ADDRESS]; /* FC vNIC only: LUN address */ + u8 crn; /* SCSI Command Reference No. */ + u8 _resvd2[3]; /* reserved */ + u32 tmf_type; /* task management request type */ +}; + +/* + * Task Management request + */ +#define FCPIO_TTMF_CLR_ACA 0x40 /* Clear ACA condition */ +#define FCPIO_TTMF_LUN_RESET 0x10 /* logical unit reset task mgmt */ +#define FCPIO_TTMF_CLR_TASK_SET 0x04 /* clear task set */ +#define FCPIO_TTMF_ABT_TASK_SET 0x02 /* abort task set */ +#define FCPIO_TTMF_ABT_TASK 0x01 /* abort task */ + +/* + * fcpio_tabort_cmpl: firmware -> host response + * + * used by the firmware to respond to a host's tabort request + */ +struct fcpio_tabort_cmpl { + u16 rx_id; /* rx_id of the target request */ + u16 _resvd0; /* reserved */ +}; + +/* + * fcpio_ack: firmware -> host response + * + * used by firmware to notify the host of the last work request received + */ +struct fcpio_ack { + u16 request_out; /* last host entry received */ + u16 _resvd; +}; + +/* + * fcpio_reset_cmpl: firmware -> host response + * + * use by firmware to respond to the host's reset request + */ +struct fcpio_reset_cmpl { + u16 vnic_id; +}; + +/* + * fcpio_flogi_reg_cmpl: firmware -> host response + * + * fc vnic only + * response to the fcpio_flogi_reg request + */ +struct fcpio_flogi_reg_cmpl { + u32 _resvd; +}; + +/* + * fcpio_echo_cmpl: firmware -> host response + * + * response to the fcpio_echo request + */ +struct fcpio_echo_cmpl { + u32 _resvd; +}; + +/* + * fcpio_lunmap_chng: firmware -> host notification + * + * scsi vnic only + * notifies the host that the lunmap tables have changed + */ +struct fcpio_lunmap_chng { + u32 _resvd; +}; + +/* + * fcpio_lunmap_req_cmpl: firmware -> host response + * + * scsi vnic only + * response for lunmap table request from the host + */ +struct fcpio_lunmap_req_cmpl { + u32 _resvd; +}; + +/* + * Basic structure for all fcpio structures that are sent from the firmware to + * the host. They are 64 bytes per structure. + */ +#define FCPIO_FW_REQ_LEN 64 /* expected length of fw requests */ +struct fcpio_fw_req { + struct fcpio_header hdr; + + union { + /* + * Defines space needed for request + */ + u8 buf[FCPIO_FW_REQ_LEN - sizeof(struct fcpio_header)]; + + /* + * Initiator firmware responses + */ + struct fcpio_icmnd_cmpl icmnd_cmpl; + struct fcpio_itmf_cmpl itmf_cmpl; + + /* + * Target firmware new requests + */ + struct fcpio_tcmnd_16 tcmnd_16; + struct fcpio_tcmnd_32 tcmnd_32; + + /* + * Target firmware responses + */ + struct fcpio_tdrsp_cmpl tdrsp_cmpl; + struct fcpio_ttmf ttmf; + struct fcpio_tabort_cmpl tabort_cmpl; + + /* + * Firmware response to work received + */ + struct fcpio_ack ack; + + /* + * Misc requests + */ + struct fcpio_reset_cmpl reset_cmpl; + struct fcpio_flogi_reg_cmpl flogi_reg_cmpl; + struct fcpio_echo_cmpl echo_cmpl; + struct fcpio_lunmap_chng lunmap_chng; + struct fcpio_lunmap_req_cmpl lunmap_req_cmpl; + } u; +}; + +/* + * Access routines to encode and decode the color bit, which is the most + * significant bit of the MSB of the structure + */ +static inline void fcpio_color_enc(struct fcpio_fw_req *fw_req, u8 color) +{ + u8 *c = ((u8 *) fw_req) + sizeof(struct fcpio_fw_req) - 1; + + if (color) + *c |= 0x80; + else + *c &= ~0x80; +} + +static inline void fcpio_color_dec(struct fcpio_fw_req *fw_req, u8 *color) +{ + u8 *c = ((u8 *) fw_req) + sizeof(struct fcpio_fw_req) - 1; + + *color = *c >> 7; + + /* + * Make sure color bit is read from desc *before* other fields + * are read from desc. Hardware guarantees color bit is last + * bit (byte) written. Adding the rmb() prevents the compiler + * and/or CPU from reordering the reads which would potentially + * result in reading stale values. + */ + + rmb(); + +} + +/* + * Lunmap table entry for scsi vnics + */ +#define FCPIO_LUNMAP_TABLE_SIZE 256 +#define FCPIO_FLAGS_LUNMAP_VALID 0x80 +#define FCPIO_FLAGS_BOOT 0x01 +struct fcpio_lunmap_entry { + u8 bus; + u8 target; + u8 lun; + u8 path_cnt; + u16 flags; + u16 update_cnt; +}; + +struct fcpio_lunmap_tbl { + u32 update_cnt; + struct fcpio_lunmap_entry lunmaps[FCPIO_LUNMAP_TABLE_SIZE]; +}; + +#endif /* _FCPIO_H_ */ diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h new file mode 100644 index 000000000000..e4c0a3d7d87b --- /dev/null +++ b/drivers/scsi/fnic/fnic.h @@ -0,0 +1,265 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _FNIC_H_ +#define _FNIC_H_ + +#include <linux/interrupt.h> +#include <linux/netdevice.h> +#include <linux/workqueue.h> +#include <scsi/libfc.h> +#include "fnic_io.h" +#include "fnic_res.h" +#include "vnic_dev.h" +#include "vnic_wq.h" +#include "vnic_rq.h" +#include "vnic_cq.h" +#include "vnic_wq_copy.h" +#include "vnic_intr.h" +#include "vnic_stats.h" +#include "vnic_scsi.h" + +#define DRV_NAME "fnic" +#define DRV_DESCRIPTION "Cisco FCoE HBA Driver" +#define DRV_VERSION "1.0.0.1121" +#define PFX DRV_NAME ": " +#define DFX DRV_NAME "%d: " + +#define DESC_CLEAN_LOW_WATERMARK 8 +#define FNIC_MAX_IO_REQ 2048 /* scsi_cmnd tag map entries */ +#define FNIC_IO_LOCKS 64 /* IO locks: power of 2 */ +#define FNIC_DFLT_QUEUE_DEPTH 32 +#define FNIC_STATS_RATE_LIMIT 4 /* limit rate at which stats are pulled up */ + +/* + * Tag bits used for special requests. + */ +#define BIT(nr) (1UL << (nr)) +#define FNIC_TAG_ABORT BIT(30) /* tag bit indicating abort */ +#define FNIC_TAG_DEV_RST BIT(29) /* indicates device reset */ +#define FNIC_TAG_MASK (BIT(24) - 1) /* mask for lookup */ +#define FNIC_NO_TAG -1 + +/* + * Usage of the scsi_cmnd scratchpad. + * These fields are locked by the hashed io_req_lock. + */ +#define CMD_SP(Cmnd) ((Cmnd)->SCp.ptr) +#define CMD_STATE(Cmnd) ((Cmnd)->SCp.phase) +#define CMD_ABTS_STATUS(Cmnd) ((Cmnd)->SCp.Message) +#define CMD_LR_STATUS(Cmnd) ((Cmnd)->SCp.have_data_in) +#define CMD_TAG(Cmnd) ((Cmnd)->SCp.sent_command) + +#define FCPIO_INVALID_CODE 0x100 /* hdr_status value unused by firmware */ + +#define FNIC_LUN_RESET_TIMEOUT 10000 /* mSec */ +#define FNIC_HOST_RESET_TIMEOUT 10000 /* mSec */ +#define FNIC_RMDEVICE_TIMEOUT 1000 /* mSec */ +#define FNIC_HOST_RESET_SETTLE_TIME 30 /* Sec */ + +#define FNIC_MAX_FCP_TARGET 256 + +extern unsigned int fnic_log_level; + +#define FNIC_MAIN_LOGGING 0x01 +#define FNIC_FCS_LOGGING 0x02 +#define FNIC_SCSI_LOGGING 0x04 +#define FNIC_ISR_LOGGING 0x08 + +#define FNIC_CHECK_LOGGING(LEVEL, CMD) \ +do { \ + if (unlikely(fnic_log_level & LEVEL)) \ + do { \ + CMD; \ + } while (0); \ +} while (0) + +#define FNIC_MAIN_DBG(kern_level, host, fmt, args...) \ + FNIC_CHECK_LOGGING(FNIC_MAIN_LOGGING, \ + shost_printk(kern_level, host, fmt, ##args);) + +#define FNIC_FCS_DBG(kern_level, host, fmt, args...) \ + FNIC_CHECK_LOGGING(FNIC_FCS_LOGGING, \ + shost_printk(kern_level, host, fmt, ##args);) + +#define FNIC_SCSI_DBG(kern_level, host, fmt, args...) \ + FNIC_CHECK_LOGGING(FNIC_SCSI_LOGGING, \ + shost_printk(kern_level, host, fmt, ##args);) + +#define FNIC_ISR_DBG(kern_level, host, fmt, args...) \ + FNIC_CHECK_LOGGING(FNIC_ISR_LOGGING, \ + shost_printk(kern_level, host, fmt, ##args);) + +extern const char *fnic_state_str[]; + +enum fnic_intx_intr_index { + FNIC_INTX_WQ_RQ_COPYWQ, + FNIC_INTX_ERR, + FNIC_INTX_NOTIFY, + FNIC_INTX_INTR_MAX, +}; + +enum fnic_msix_intr_index { + FNIC_MSIX_RQ, + FNIC_MSIX_WQ, + FNIC_MSIX_WQ_COPY, + FNIC_MSIX_ERR_NOTIFY, + FNIC_MSIX_INTR_MAX, +}; + +struct fnic_msix_entry { + int requested; + char devname[IFNAMSIZ]; + irqreturn_t (*isr)(int, void *); + void *devid; +}; + +enum fnic_state { + FNIC_IN_FC_MODE = 0, + FNIC_IN_FC_TRANS_ETH_MODE, + FNIC_IN_ETH_MODE, + FNIC_IN_ETH_TRANS_FC_MODE, +}; + +#define FNIC_WQ_COPY_MAX 1 +#define FNIC_WQ_MAX 1 +#define FNIC_RQ_MAX 1 +#define FNIC_CQ_MAX (FNIC_WQ_COPY_MAX + FNIC_WQ_MAX + FNIC_RQ_MAX) + +struct mempool; + +/* Per-instance private data structure */ +struct fnic { + struct fc_lport *lport; + struct vnic_dev_bar bar0; + + struct msix_entry msix_entry[FNIC_MSIX_INTR_MAX]; + struct fnic_msix_entry msix[FNIC_MSIX_INTR_MAX]; + + struct vnic_stats *stats; + unsigned long stats_time; /* time of stats update */ + struct vnic_nic_cfg *nic_cfg; + char name[IFNAMSIZ]; + struct timer_list notify_timer; /* used for MSI interrupts */ + + unsigned int err_intr_offset; + unsigned int link_intr_offset; + + unsigned int wq_count; + unsigned int cq_count; + + u32 fcoui_mode:1; /* use fcoui address*/ + u32 vlan_hw_insert:1; /* let hw insert the tag */ + u32 in_remove:1; /* fnic device in removal */ + u32 stop_rx_link_events:1; /* stop proc. rx frames, link events */ + + struct completion *remove_wait; /* device remove thread blocks */ + + struct fc_frame *flogi; + struct fc_frame *flogi_resp; + u16 flogi_oxid; + unsigned long s_id; + enum fnic_state state; + spinlock_t fnic_lock; + + u16 vlan_id; /* VLAN tag including priority */ + u8 mac_addr[ETH_ALEN]; + u8 dest_addr[ETH_ALEN]; + u8 data_src_addr[ETH_ALEN]; + u64 fcp_input_bytes; /* internal statistic */ + u64 fcp_output_bytes; /* internal statistic */ + u32 link_down_cnt; + int link_status; + + struct list_head list; + struct pci_dev *pdev; + struct vnic_fc_config config; + struct vnic_dev *vdev; + unsigned int raw_wq_count; + unsigned int wq_copy_count; + unsigned int rq_count; + int fw_ack_index[FNIC_WQ_COPY_MAX]; + unsigned short fw_ack_recd[FNIC_WQ_COPY_MAX]; + unsigned short wq_copy_desc_low[FNIC_WQ_COPY_MAX]; + unsigned int intr_count; + u32 __iomem *legacy_pba; + struct fnic_host_tag *tags; + mempool_t *io_req_pool; + mempool_t *io_sgl_pool[FNIC_SGL_NUM_CACHES]; + spinlock_t io_req_lock[FNIC_IO_LOCKS]; /* locks for scsi cmnds */ + + struct work_struct link_work; + struct work_struct frame_work; + struct sk_buff_head frame_queue; + + /* copy work queue cache line section */ + ____cacheline_aligned struct vnic_wq_copy wq_copy[FNIC_WQ_COPY_MAX]; + /* completion queue cache line section */ + ____cacheline_aligned struct vnic_cq cq[FNIC_CQ_MAX]; + + spinlock_t wq_copy_lock[FNIC_WQ_COPY_MAX]; + + /* work queue cache line section */ + ____cacheline_aligned struct vnic_wq wq[FNIC_WQ_MAX]; + spinlock_t wq_lock[FNIC_WQ_MAX]; + + /* receive queue cache line section */ + ____cacheline_aligned struct vnic_rq rq[FNIC_RQ_MAX]; + + /* interrupt resource cache line section */ + ____cacheline_aligned struct vnic_intr intr[FNIC_MSIX_INTR_MAX]; +}; + +extern struct workqueue_struct *fnic_event_queue; +extern struct device_attribute *fnic_attrs[]; + +void fnic_clear_intr_mode(struct fnic *fnic); +int fnic_set_intr_mode(struct fnic *fnic); +void fnic_free_intr(struct fnic *fnic); +int fnic_request_intr(struct fnic *fnic); + +int fnic_send(struct fc_lport *, struct fc_frame *); +void fnic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf); +void fnic_handle_frame(struct work_struct *work); +void fnic_handle_link(struct work_struct *work); +int fnic_rq_cmpl_handler(struct fnic *fnic, int); +int fnic_alloc_rq_frame(struct vnic_rq *rq); +void fnic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf); +int fnic_send_frame(struct fnic *fnic, struct fc_frame *fp); + +int fnic_queuecommand(struct scsi_cmnd *, void (*done)(struct scsi_cmnd *)); +int fnic_abort_cmd(struct scsi_cmnd *); +int fnic_device_reset(struct scsi_cmnd *); +int fnic_host_reset(struct scsi_cmnd *); +int fnic_reset(struct Scsi_Host *); +void fnic_scsi_cleanup(struct fc_lport *); +void fnic_scsi_abort_io(struct fc_lport *); +void fnic_empty_scsi_cleanup(struct fc_lport *); +void fnic_exch_mgr_reset(struct fc_lport *, u32, u32); +int fnic_wq_copy_cmpl_handler(struct fnic *fnic, int); +int fnic_wq_cmpl_handler(struct fnic *fnic, int); +int fnic_flogi_reg_handler(struct fnic *fnic); +void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq, + struct fcpio_host_req *desc); +int fnic_fw_reset_handler(struct fnic *fnic); +void fnic_terminate_rport_io(struct fc_rport *); +const char *fnic_state_to_str(unsigned int state); + +void fnic_log_q_error(struct fnic *fnic); +void fnic_handle_link_event(struct fnic *fnic); + +#endif /* _FNIC_H_ */ diff --git a/drivers/scsi/fnic/fnic_attrs.c b/drivers/scsi/fnic/fnic_attrs.c new file mode 100644 index 000000000000..aea0c3becfd4 --- /dev/null +++ b/drivers/scsi/fnic/fnic_attrs.c @@ -0,0 +1,56 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/string.h> +#include <linux/device.h> +#include <scsi/scsi_host.h> +#include "fnic.h" + +static ssize_t fnic_show_state(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct fc_lport *lp = shost_priv(class_to_shost(dev)); + struct fnic *fnic = lport_priv(lp); + + return snprintf(buf, PAGE_SIZE, "%s\n", fnic_state_str[fnic->state]); +} + +static ssize_t fnic_show_drv_version(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%s\n", DRV_VERSION); +} + +static ssize_t fnic_show_link_state(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct fc_lport *lp = shost_priv(class_to_shost(dev)); + + return snprintf(buf, PAGE_SIZE, "%s\n", (lp->link_up) + ? "Link Up" : "Link Down"); +} + +static DEVICE_ATTR(fnic_state, S_IRUGO, fnic_show_state, NULL); +static DEVICE_ATTR(drv_version, S_IRUGO, fnic_show_drv_version, NULL); +static DEVICE_ATTR(link_state, S_IRUGO, fnic_show_link_state, NULL); + +struct device_attribute *fnic_attrs[] = { + &dev_attr_fnic_state, + &dev_attr_drv_version, + &dev_attr_link_state, + NULL, +}; diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c new file mode 100644 index 000000000000..07e6eedb83ce --- /dev/null +++ b/drivers/scsi/fnic/fnic_fcs.c @@ -0,0 +1,742 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/skbuff.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/workqueue.h> +#include <scsi/fc/fc_els.h> +#include <scsi/fc/fc_fcoe.h> +#include <scsi/fc_frame.h> +#include <scsi/libfc.h> +#include "fnic_io.h" +#include "fnic.h" +#include "cq_enet_desc.h" +#include "cq_exch_desc.h" + +struct workqueue_struct *fnic_event_queue; + +void fnic_handle_link(struct work_struct *work) +{ + struct fnic *fnic = container_of(work, struct fnic, link_work); + unsigned long flags; + int old_link_status; + u32 old_link_down_cnt; + + spin_lock_irqsave(&fnic->fnic_lock, flags); + + if (fnic->stop_rx_link_events) { + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + return; + } + + old_link_down_cnt = fnic->link_down_cnt; + old_link_status = fnic->link_status; + fnic->link_status = vnic_dev_link_status(fnic->vdev); + fnic->link_down_cnt = vnic_dev_link_down_cnt(fnic->vdev); + + if (old_link_status == fnic->link_status) { + if (!fnic->link_status) + /* DOWN -> DOWN */ + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + else { + if (old_link_down_cnt != fnic->link_down_cnt) { + /* UP -> DOWN -> UP */ + fnic->lport->host_stats.link_failure_count++; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, + "link down\n"); + fc_linkdown(fnic->lport); + FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, + "link up\n"); + fc_linkup(fnic->lport); + } else + /* UP -> UP */ + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + } + } else if (fnic->link_status) { + /* DOWN -> UP */ + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, "link up\n"); + fc_linkup(fnic->lport); + } else { + /* UP -> DOWN */ + fnic->lport->host_stats.link_failure_count++; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, "link down\n"); + fc_linkdown(fnic->lport); + } + +} + +/* + * This function passes incoming fabric frames to libFC + */ +void fnic_handle_frame(struct work_struct *work) +{ + struct fnic *fnic = container_of(work, struct fnic, frame_work); + struct fc_lport *lp = fnic->lport; + unsigned long flags; + struct sk_buff *skb; + struct fc_frame *fp; + + while ((skb = skb_dequeue(&fnic->frame_queue))) { + + spin_lock_irqsave(&fnic->fnic_lock, flags); + if (fnic->stop_rx_link_events) { + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + dev_kfree_skb(skb); + return; + } + fp = (struct fc_frame *)skb; + /* if Flogi resp frame, register the address */ + if (fr_flags(fp)) { + vnic_dev_add_addr(fnic->vdev, + fnic->data_src_addr); + fr_flags(fp) = 0; + } + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + fc_exch_recv(lp, lp->emp, fp); + } + +} + +static inline void fnic_import_rq_fc_frame(struct sk_buff *skb, + u32 len, u8 sof, u8 eof) +{ + struct fc_frame *fp = (struct fc_frame *)skb; + + skb_trim(skb, len); + fr_eof(fp) = eof; + fr_sof(fp) = sof; +} + + +static inline int fnic_import_rq_eth_pkt(struct sk_buff *skb, u32 len) +{ + struct fc_frame *fp; + struct ethhdr *eh; + struct vlan_ethhdr *vh; + struct fcoe_hdr *fcoe_hdr; + struct fcoe_crc_eof *ft; + u32 transport_len = 0; + + eh = (struct ethhdr *)skb->data; + vh = (struct vlan_ethhdr *)skb->data; + if (vh->h_vlan_proto == htons(ETH_P_8021Q) && + vh->h_vlan_encapsulated_proto == htons(ETH_P_FCOE)) { + skb_pull(skb, sizeof(struct vlan_ethhdr)); + transport_len += sizeof(struct vlan_ethhdr); + } else if (eh->h_proto == htons(ETH_P_FCOE)) { + transport_len += sizeof(struct ethhdr); + skb_pull(skb, sizeof(struct ethhdr)); + } else + return -1; + + fcoe_hdr = (struct fcoe_hdr *)skb->data; + if (FC_FCOE_DECAPS_VER(fcoe_hdr) != FC_FCOE_VER) + return -1; + + fp = (struct fc_frame *)skb; + fc_frame_init(fp); + fr_sof(fp) = fcoe_hdr->fcoe_sof; + skb_pull(skb, sizeof(struct fcoe_hdr)); + transport_len += sizeof(struct fcoe_hdr); + + ft = (struct fcoe_crc_eof *)(skb->data + len - + transport_len - sizeof(*ft)); + fr_eof(fp) = ft->fcoe_eof; + skb_trim(skb, len - transport_len - sizeof(*ft)); + return 0; +} + +static inline int fnic_handle_flogi_resp(struct fnic *fnic, + struct fc_frame *fp) +{ + u8 mac[ETH_ALEN] = FC_FCOE_FLOGI_MAC; + struct ethhdr *eth_hdr; + struct fc_frame_header *fh; + int ret = 0; + unsigned long flags; + struct fc_frame *old_flogi_resp = NULL; + + fh = (struct fc_frame_header *)fr_hdr(fp); + + spin_lock_irqsave(&fnic->fnic_lock, flags); + + if (fnic->state == FNIC_IN_ETH_MODE) { + + /* + * Check if oxid matches on taking the lock. A new Flogi + * issued by libFC might have changed the fnic cached oxid + */ + if (fnic->flogi_oxid != ntohs(fh->fh_ox_id)) { + FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, + "Flogi response oxid not" + " matching cached oxid, dropping frame" + "\n"); + ret = -1; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + dev_kfree_skb_irq(fp_skb(fp)); + goto handle_flogi_resp_end; + } + + /* Drop older cached flogi response frame, cache this frame */ + old_flogi_resp = fnic->flogi_resp; + fnic->flogi_resp = fp; + fnic->flogi_oxid = FC_XID_UNKNOWN; + + /* + * this frame is part of flogi get the src mac addr from this + * frame if the src mac is fcoui based then we mark the + * address mode flag to use fcoui base for dst mac addr + * otherwise we have to store the fcoe gateway addr + */ + eth_hdr = (struct ethhdr *)skb_mac_header(fp_skb(fp)); + memcpy(mac, eth_hdr->h_source, ETH_ALEN); + + if (ntoh24(mac) == FC_FCOE_OUI) + fnic->fcoui_mode = 1; + else { + fnic->fcoui_mode = 0; + memcpy(fnic->dest_addr, mac, ETH_ALEN); + } + + /* + * Except for Flogi frame, all outbound frames from us have the + * Eth Src address as FC_FCOE_OUI"our_sid". Flogi frame uses + * the vnic MAC address as the Eth Src address + */ + fc_fcoe_set_mac(fnic->data_src_addr, fh->fh_d_id); + + /* We get our s_id from the d_id of the flogi resp frame */ + fnic->s_id = ntoh24(fh->fh_d_id); + + /* Change state to reflect transition from Eth to FC mode */ + fnic->state = FNIC_IN_ETH_TRANS_FC_MODE; + + } else { + FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, + "Unexpected fnic state %s while" + " processing flogi resp\n", + fnic_state_to_str(fnic->state)); + ret = -1; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + dev_kfree_skb_irq(fp_skb(fp)); + goto handle_flogi_resp_end; + } + + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + /* Drop older cached frame */ + if (old_flogi_resp) + dev_kfree_skb_irq(fp_skb(old_flogi_resp)); + + /* + * send flogi reg request to firmware, this will put the fnic in + * in FC mode + */ + ret = fnic_flogi_reg_handler(fnic); + + if (ret < 0) { + int free_fp = 1; + spin_lock_irqsave(&fnic->fnic_lock, flags); + /* + * free the frame is some other thread is not + * pointing to it + */ + if (fnic->flogi_resp != fp) + free_fp = 0; + else + fnic->flogi_resp = NULL; + + if (fnic->state == FNIC_IN_ETH_TRANS_FC_MODE) + fnic->state = FNIC_IN_ETH_MODE; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + if (free_fp) + dev_kfree_skb_irq(fp_skb(fp)); + } + + handle_flogi_resp_end: + return ret; +} + +/* Returns 1 for a response that matches cached flogi oxid */ +static inline int is_matching_flogi_resp_frame(struct fnic *fnic, + struct fc_frame *fp) +{ + struct fc_frame_header *fh; + int ret = 0; + u32 f_ctl; + + fh = fc_frame_header_get(fp); + f_ctl = ntoh24(fh->fh_f_ctl); + + if (fnic->flogi_oxid == ntohs(fh->fh_ox_id) && + fh->fh_r_ctl == FC_RCTL_ELS_REP && + (f_ctl & (FC_FC_EX_CTX | FC_FC_SEQ_CTX)) == FC_FC_EX_CTX && + fh->fh_type == FC_TYPE_ELS) + ret = 1; + + return ret; +} + +static void fnic_rq_cmpl_frame_recv(struct vnic_rq *rq, struct cq_desc + *cq_desc, struct vnic_rq_buf *buf, + int skipped __attribute__((unused)), + void *opaque) +{ + struct fnic *fnic = vnic_dev_priv(rq->vdev); + struct sk_buff *skb; + struct fc_frame *fp; + unsigned int eth_hdrs_stripped; + u8 type, color, eop, sop, ingress_port, vlan_stripped; + u8 fcoe = 0, fcoe_sof, fcoe_eof; + u8 fcoe_fc_crc_ok = 1, fcoe_enc_error = 0; + u8 tcp_udp_csum_ok, udp, tcp, ipv4_csum_ok; + u8 ipv6, ipv4, ipv4_fragment, rss_type, csum_not_calc; + u8 fcs_ok = 1, packet_error = 0; + u16 q_number, completed_index, bytes_written = 0, vlan, checksum; + u32 rss_hash; + u16 exchange_id, tmpl; + u8 sof = 0; + u8 eof = 0; + u32 fcp_bytes_written = 0; + unsigned long flags; + + pci_unmap_single(fnic->pdev, buf->dma_addr, buf->len, + PCI_DMA_FROMDEVICE); + skb = buf->os_buf; + buf->os_buf = NULL; + + cq_desc_dec(cq_desc, &type, &color, &q_number, &completed_index); + if (type == CQ_DESC_TYPE_RQ_FCP) { + cq_fcp_rq_desc_dec((struct cq_fcp_rq_desc *)cq_desc, + &type, &color, &q_number, &completed_index, + &eop, &sop, &fcoe_fc_crc_ok, &exchange_id, + &tmpl, &fcp_bytes_written, &sof, &eof, + &ingress_port, &packet_error, + &fcoe_enc_error, &fcs_ok, &vlan_stripped, + &vlan); + eth_hdrs_stripped = 1; + + } else if (type == CQ_DESC_TYPE_RQ_ENET) { + cq_enet_rq_desc_dec((struct cq_enet_rq_desc *)cq_desc, + &type, &color, &q_number, &completed_index, + &ingress_port, &fcoe, &eop, &sop, + &rss_type, &csum_not_calc, &rss_hash, + &bytes_written, &packet_error, + &vlan_stripped, &vlan, &checksum, + &fcoe_sof, &fcoe_fc_crc_ok, + &fcoe_enc_error, &fcoe_eof, + &tcp_udp_csum_ok, &udp, &tcp, + &ipv4_csum_ok, &ipv6, &ipv4, + &ipv4_fragment, &fcs_ok); + eth_hdrs_stripped = 0; + + } else { + /* wrong CQ type*/ + shost_printk(KERN_ERR, fnic->lport->host, + "fnic rq_cmpl wrong cq type x%x\n", type); + goto drop; + } + + if (!fcs_ok || packet_error || !fcoe_fc_crc_ok || fcoe_enc_error) { + FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, + "fnic rq_cmpl fcoe x%x fcsok x%x" + " pkterr x%x fcoe_fc_crc_ok x%x, fcoe_enc_err" + " x%x\n", + fcoe, fcs_ok, packet_error, + fcoe_fc_crc_ok, fcoe_enc_error); + goto drop; + } + + if (eth_hdrs_stripped) + fnic_import_rq_fc_frame(skb, fcp_bytes_written, sof, eof); + else if (fnic_import_rq_eth_pkt(skb, bytes_written)) + goto drop; + + fp = (struct fc_frame *)skb; + + /* + * If frame is an ELS response that matches the cached FLOGI OX_ID, + * and is accept, issue flogi_reg_request copy wq request to firmware + * to register the S_ID and determine whether FC_OUI mode or GW mode. + */ + if (is_matching_flogi_resp_frame(fnic, fp)) { + if (!eth_hdrs_stripped) { + if (fc_frame_payload_op(fp) == ELS_LS_ACC) { + fnic_handle_flogi_resp(fnic, fp); + return; + } + /* + * Recd. Flogi reject. No point registering + * with fw, but forward to libFC + */ + goto forward; + } + goto drop; + } + if (!eth_hdrs_stripped) + goto drop; + +forward: + spin_lock_irqsave(&fnic->fnic_lock, flags); + if (fnic->stop_rx_link_events) { + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + goto drop; + } + /* Use fr_flags to indicate whether succ. flogi resp or not */ + fr_flags(fp) = 0; + fr_dev(fp) = fnic->lport; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + skb_queue_tail(&fnic->frame_queue, skb); + queue_work(fnic_event_queue, &fnic->frame_work); + + return; +drop: + dev_kfree_skb_irq(skb); +} + +static int fnic_rq_cmpl_handler_cont(struct vnic_dev *vdev, + struct cq_desc *cq_desc, u8 type, + u16 q_number, u16 completed_index, + void *opaque) +{ + struct fnic *fnic = vnic_dev_priv(vdev); + + vnic_rq_service(&fnic->rq[q_number], cq_desc, completed_index, + VNIC_RQ_RETURN_DESC, fnic_rq_cmpl_frame_recv, + NULL); + return 0; +} + +int fnic_rq_cmpl_handler(struct fnic *fnic, int rq_work_to_do) +{ + unsigned int tot_rq_work_done = 0, cur_work_done; + unsigned int i; + int err; + + for (i = 0; i < fnic->rq_count; i++) { + cur_work_done = vnic_cq_service(&fnic->cq[i], rq_work_to_do, + fnic_rq_cmpl_handler_cont, + NULL); + if (cur_work_done) { + err = vnic_rq_fill(&fnic->rq[i], fnic_alloc_rq_frame); + if (err) + shost_printk(KERN_ERR, fnic->lport->host, + "fnic_alloc_rq_frame cant alloc" + " frame\n"); + } + tot_rq_work_done += cur_work_done; + } + + return tot_rq_work_done; +} + +/* + * This function is called once at init time to allocate and fill RQ + * buffers. Subsequently, it is called in the interrupt context after RQ + * buffer processing to replenish the buffers in the RQ + */ +int fnic_alloc_rq_frame(struct vnic_rq *rq) +{ + struct fnic *fnic = vnic_dev_priv(rq->vdev); + struct sk_buff *skb; + u16 len; + dma_addr_t pa; + + len = FC_FRAME_HEADROOM + FC_MAX_FRAME + FC_FRAME_TAILROOM; + skb = dev_alloc_skb(len); + if (!skb) { + FNIC_FCS_DBG(KERN_DEBUG, fnic->lport->host, + "Unable to allocate RQ sk_buff\n"); + return -ENOMEM; + } + skb_reset_mac_header(skb); + skb_reset_transport_header(skb); + skb_reset_network_header(skb); + skb_put(skb, len); + pa = pci_map_single(fnic->pdev, skb->data, len, PCI_DMA_FROMDEVICE); + fnic_queue_rq_desc(rq, skb, pa, len); + return 0; +} + +void fnic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf) +{ + struct fc_frame *fp = buf->os_buf; + struct fnic *fnic = vnic_dev_priv(rq->vdev); + + pci_unmap_single(fnic->pdev, buf->dma_addr, buf->len, + PCI_DMA_FROMDEVICE); + + dev_kfree_skb(fp_skb(fp)); + buf->os_buf = NULL; +} + +static inline int is_flogi_frame(struct fc_frame_header *fh) +{ + return fh->fh_r_ctl == FC_RCTL_ELS_REQ && *(u8 *)(fh + 1) == ELS_FLOGI; +} + +int fnic_send_frame(struct fnic *fnic, struct fc_frame *fp) +{ + struct vnic_wq *wq = &fnic->wq[0]; + struct sk_buff *skb; + dma_addr_t pa; + struct ethhdr *eth_hdr; + struct vlan_ethhdr *vlan_hdr; + struct fcoe_hdr *fcoe_hdr; + struct fc_frame_header *fh; + u32 tot_len, eth_hdr_len; + int ret = 0; + unsigned long flags; + + fh = fc_frame_header_get(fp); + skb = fp_skb(fp); + + if (!fnic->vlan_hw_insert) { + eth_hdr_len = sizeof(*vlan_hdr) + sizeof(*fcoe_hdr); + vlan_hdr = (struct vlan_ethhdr *)skb_push(skb, eth_hdr_len); + eth_hdr = (struct ethhdr *)vlan_hdr; + vlan_hdr->h_vlan_proto = htons(ETH_P_8021Q); + vlan_hdr->h_vlan_encapsulated_proto = htons(ETH_P_FCOE); + vlan_hdr->h_vlan_TCI = htons(fnic->vlan_id); + fcoe_hdr = (struct fcoe_hdr *)(vlan_hdr + 1); + } else { + eth_hdr_len = sizeof(*eth_hdr) + sizeof(*fcoe_hdr); + eth_hdr = (struct ethhdr *)skb_push(skb, eth_hdr_len); + eth_hdr->h_proto = htons(ETH_P_FCOE); + fcoe_hdr = (struct fcoe_hdr *)(eth_hdr + 1); + } + + if (is_flogi_frame(fh)) { + fc_fcoe_set_mac(eth_hdr->h_dest, fh->fh_d_id); + memcpy(eth_hdr->h_source, fnic->mac_addr, ETH_ALEN); + } else { + if (fnic->fcoui_mode) + fc_fcoe_set_mac(eth_hdr->h_dest, fh->fh_d_id); + else + memcpy(eth_hdr->h_dest, fnic->dest_addr, ETH_ALEN); + memcpy(eth_hdr->h_source, fnic->data_src_addr, ETH_ALEN); + } + + tot_len = skb->len; + BUG_ON(tot_len % 4); + + memset(fcoe_hdr, 0, sizeof(*fcoe_hdr)); + fcoe_hdr->fcoe_sof = fr_sof(fp); + if (FC_FCOE_VER) + FC_FCOE_ENCAPS_VER(fcoe_hdr, FC_FCOE_VER); + + pa = pci_map_single(fnic->pdev, eth_hdr, tot_len, PCI_DMA_TODEVICE); + + spin_lock_irqsave(&fnic->wq_lock[0], flags); + + if (!vnic_wq_desc_avail(wq)) { + pci_unmap_single(fnic->pdev, pa, + tot_len, PCI_DMA_TODEVICE); + ret = -1; + goto fnic_send_frame_end; + } + + fnic_queue_wq_desc(wq, skb, pa, tot_len, fr_eof(fp), + fnic->vlan_hw_insert, fnic->vlan_id, 1, 1, 1); +fnic_send_frame_end: + spin_unlock_irqrestore(&fnic->wq_lock[0], flags); + + if (ret) + dev_kfree_skb_any(fp_skb(fp)); + + return ret; +} + +/* + * fnic_send + * Routine to send a raw frame + */ +int fnic_send(struct fc_lport *lp, struct fc_frame *fp) +{ + struct fnic *fnic = lport_priv(lp); + struct fc_frame_header *fh; + int ret = 0; + enum fnic_state old_state; + unsigned long flags; + struct fc_frame *old_flogi = NULL; + struct fc_frame *old_flogi_resp = NULL; + + if (fnic->in_remove) { + dev_kfree_skb(fp_skb(fp)); + ret = -1; + goto fnic_send_end; + } + + fh = fc_frame_header_get(fp); + /* if not an Flogi frame, send it out, this is the common case */ + if (!is_flogi_frame(fh)) + return fnic_send_frame(fnic, fp); + + /* Flogi frame, now enter the state machine */ + + spin_lock_irqsave(&fnic->fnic_lock, flags); +again: + /* Get any old cached frames, free them after dropping lock */ + old_flogi = fnic->flogi; + fnic->flogi = NULL; + old_flogi_resp = fnic->flogi_resp; + fnic->flogi_resp = NULL; + + fnic->flogi_oxid = FC_XID_UNKNOWN; + + old_state = fnic->state; + switch (old_state) { + case FNIC_IN_FC_MODE: + case FNIC_IN_ETH_TRANS_FC_MODE: + default: + fnic->state = FNIC_IN_FC_TRANS_ETH_MODE; + vnic_dev_del_addr(fnic->vdev, fnic->data_src_addr); + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + if (old_flogi) { + dev_kfree_skb(fp_skb(old_flogi)); + old_flogi = NULL; + } + if (old_flogi_resp) { + dev_kfree_skb(fp_skb(old_flogi_resp)); + old_flogi_resp = NULL; + } + + ret = fnic_fw_reset_handler(fnic); + + spin_lock_irqsave(&fnic->fnic_lock, flags); + if (fnic->state != FNIC_IN_FC_TRANS_ETH_MODE) + goto again; + if (ret) { + fnic->state = old_state; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + dev_kfree_skb(fp_skb(fp)); + goto fnic_send_end; + } + old_flogi = fnic->flogi; + fnic->flogi = fp; + fnic->flogi_oxid = ntohs(fh->fh_ox_id); + old_flogi_resp = fnic->flogi_resp; + fnic->flogi_resp = NULL; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + break; + + case FNIC_IN_FC_TRANS_ETH_MODE: + /* + * A reset is pending with the firmware. Store the flogi + * and its oxid. The transition out of this state happens + * only when Firmware completes the reset, either with + * success or failed. If success, transition to + * FNIC_IN_ETH_MODE, if fail, then transition to + * FNIC_IN_FC_MODE + */ + fnic->flogi = fp; + fnic->flogi_oxid = ntohs(fh->fh_ox_id); + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + break; + + case FNIC_IN_ETH_MODE: + /* + * The fw/hw is already in eth mode. Store the oxid, + * and send the flogi frame out. The transition out of this + * state happens only we receive flogi response from the + * network, and the oxid matches the cached oxid when the + * flogi frame was sent out. If they match, then we issue + * a flogi_reg request and transition to state + * FNIC_IN_ETH_TRANS_FC_MODE + */ + fnic->flogi_oxid = ntohs(fh->fh_ox_id); + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + ret = fnic_send_frame(fnic, fp); + break; + } + +fnic_send_end: + if (old_flogi) + dev_kfree_skb(fp_skb(old_flogi)); + if (old_flogi_resp) + dev_kfree_skb(fp_skb(old_flogi_resp)); + return ret; +} + +static void fnic_wq_complete_frame_send(struct vnic_wq *wq, + struct cq_desc *cq_desc, + struct vnic_wq_buf *buf, void *opaque) +{ + struct sk_buff *skb = buf->os_buf; + struct fc_frame *fp = (struct fc_frame *)skb; + struct fnic *fnic = vnic_dev_priv(wq->vdev); + + pci_unmap_single(fnic->pdev, buf->dma_addr, + buf->len, PCI_DMA_TODEVICE); + dev_kfree_skb_irq(fp_skb(fp)); + buf->os_buf = NULL; +} + +static int fnic_wq_cmpl_handler_cont(struct vnic_dev *vdev, + struct cq_desc *cq_desc, u8 type, + u16 q_number, u16 completed_index, + void *opaque) +{ + struct fnic *fnic = vnic_dev_priv(vdev); + unsigned long flags; + + spin_lock_irqsave(&fnic->wq_lock[q_number], flags); + vnic_wq_service(&fnic->wq[q_number], cq_desc, completed_index, + fnic_wq_complete_frame_send, NULL); + spin_unlock_irqrestore(&fnic->wq_lock[q_number], flags); + + return 0; +} + +int fnic_wq_cmpl_handler(struct fnic *fnic, int work_to_do) +{ + unsigned int wq_work_done = 0; + unsigned int i; + + for (i = 0; i < fnic->raw_wq_count; i++) { + wq_work_done += vnic_cq_service(&fnic->cq[fnic->rq_count+i], + work_to_do, + fnic_wq_cmpl_handler_cont, + NULL); + } + + return wq_work_done; +} + + +void fnic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf) +{ + struct fc_frame *fp = buf->os_buf; + struct fnic *fnic = vnic_dev_priv(wq->vdev); + + pci_unmap_single(fnic->pdev, buf->dma_addr, + buf->len, PCI_DMA_TODEVICE); + + dev_kfree_skb(fp_skb(fp)); + buf->os_buf = NULL; +} diff --git a/drivers/scsi/fnic/fnic_io.h b/drivers/scsi/fnic/fnic_io.h new file mode 100644 index 000000000000..f0b896988cd5 --- /dev/null +++ b/drivers/scsi/fnic/fnic_io.h @@ -0,0 +1,67 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _FNIC_IO_H_ +#define _FNIC_IO_H_ + +#include <scsi/fc/fc_fcp.h> + +#define FNIC_DFLT_SG_DESC_CNT 32 +#define FNIC_MAX_SG_DESC_CNT 1024 /* Maximum descriptors per sgl */ +#define FNIC_SG_DESC_ALIGN 16 /* Descriptor address alignment */ + +struct host_sg_desc { + __le64 addr; + __le32 len; + u32 _resvd; +}; + +struct fnic_dflt_sgl_list { + struct host_sg_desc sg_desc[FNIC_DFLT_SG_DESC_CNT]; +}; + +struct fnic_sgl_list { + struct host_sg_desc sg_desc[FNIC_MAX_SG_DESC_CNT]; +}; + +enum fnic_sgl_list_type { + FNIC_SGL_CACHE_DFLT = 0, /* cache with default size sgl */ + FNIC_SGL_CACHE_MAX, /* cache with max size sgl */ + FNIC_SGL_NUM_CACHES /* number of sgl caches */ +}; + +enum fnic_ioreq_state { + FNIC_IOREQ_CMD_PENDING = 0, + FNIC_IOREQ_ABTS_PENDING, + FNIC_IOREQ_ABTS_COMPLETE, + FNIC_IOREQ_CMD_COMPLETE, +}; + +struct fnic_io_req { + struct host_sg_desc *sgl_list; /* sgl list */ + void *sgl_list_alloc; /* sgl list address used for free */ + dma_addr_t sense_buf_pa; /* dma address for sense buffer*/ + dma_addr_t sgl_list_pa; /* dma address for sgl list */ + u16 sgl_cnt; + u8 sgl_type; /* device DMA descriptor list type */ + u8 io_completed:1; /* set to 1 when fw completes IO */ + u32 port_id; /* remote port DID */ + struct completion *abts_done; /* completion for abts */ + struct completion *dr_done; /* completion for device reset */ +}; + +#endif /* _FNIC_IO_H_ */ diff --git a/drivers/scsi/fnic/fnic_isr.c b/drivers/scsi/fnic/fnic_isr.c new file mode 100644 index 000000000000..2b3064828aea --- /dev/null +++ b/drivers/scsi/fnic/fnic_isr.c @@ -0,0 +1,332 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <scsi/libfc.h> +#include <scsi/fc_frame.h> +#include "vnic_dev.h" +#include "vnic_intr.h" +#include "vnic_stats.h" +#include "fnic_io.h" +#include "fnic.h" + +static irqreturn_t fnic_isr_legacy(int irq, void *data) +{ + struct fnic *fnic = data; + u32 pba; + unsigned long work_done = 0; + + pba = vnic_intr_legacy_pba(fnic->legacy_pba); + if (!pba) + return IRQ_NONE; + + if (pba & (1 << FNIC_INTX_NOTIFY)) { + vnic_intr_return_all_credits(&fnic->intr[FNIC_INTX_NOTIFY]); + fnic_handle_link_event(fnic); + } + + if (pba & (1 << FNIC_INTX_ERR)) { + vnic_intr_return_all_credits(&fnic->intr[FNIC_INTX_ERR]); + fnic_log_q_error(fnic); + } + + if (pba & (1 << FNIC_INTX_WQ_RQ_COPYWQ)) { + work_done += fnic_wq_copy_cmpl_handler(fnic, 8); + work_done += fnic_wq_cmpl_handler(fnic, 4); + work_done += fnic_rq_cmpl_handler(fnic, 4); + + vnic_intr_return_credits(&fnic->intr[FNIC_INTX_WQ_RQ_COPYWQ], + work_done, + 1 /* unmask intr */, + 1 /* reset intr timer */); + } + + return IRQ_HANDLED; +} + +static irqreturn_t fnic_isr_msi(int irq, void *data) +{ + struct fnic *fnic = data; + unsigned long work_done = 0; + + work_done += fnic_wq_copy_cmpl_handler(fnic, 8); + work_done += fnic_wq_cmpl_handler(fnic, 4); + work_done += fnic_rq_cmpl_handler(fnic, 4); + + vnic_intr_return_credits(&fnic->intr[0], + work_done, + 1 /* unmask intr */, + 1 /* reset intr timer */); + + return IRQ_HANDLED; +} + +static irqreturn_t fnic_isr_msix_rq(int irq, void *data) +{ + struct fnic *fnic = data; + unsigned long rq_work_done = 0; + + rq_work_done = fnic_rq_cmpl_handler(fnic, 4); + vnic_intr_return_credits(&fnic->intr[FNIC_MSIX_RQ], + rq_work_done, + 1 /* unmask intr */, + 1 /* reset intr timer */); + + return IRQ_HANDLED; +} + +static irqreturn_t fnic_isr_msix_wq(int irq, void *data) +{ + struct fnic *fnic = data; + unsigned long wq_work_done = 0; + + wq_work_done = fnic_wq_cmpl_handler(fnic, 4); + vnic_intr_return_credits(&fnic->intr[FNIC_MSIX_WQ], + wq_work_done, + 1 /* unmask intr */, + 1 /* reset intr timer */); + return IRQ_HANDLED; +} + +static irqreturn_t fnic_isr_msix_wq_copy(int irq, void *data) +{ + struct fnic *fnic = data; + unsigned long wq_copy_work_done = 0; + + wq_copy_work_done = fnic_wq_copy_cmpl_handler(fnic, 8); + vnic_intr_return_credits(&fnic->intr[FNIC_MSIX_WQ_COPY], + wq_copy_work_done, + 1 /* unmask intr */, + 1 /* reset intr timer */); + return IRQ_HANDLED; +} + +static irqreturn_t fnic_isr_msix_err_notify(int irq, void *data) +{ + struct fnic *fnic = data; + + vnic_intr_return_all_credits(&fnic->intr[FNIC_MSIX_ERR_NOTIFY]); + fnic_log_q_error(fnic); + fnic_handle_link_event(fnic); + + return IRQ_HANDLED; +} + +void fnic_free_intr(struct fnic *fnic) +{ + int i; + + switch (vnic_dev_get_intr_mode(fnic->vdev)) { + case VNIC_DEV_INTR_MODE_INTX: + case VNIC_DEV_INTR_MODE_MSI: + free_irq(fnic->pdev->irq, fnic); + break; + + case VNIC_DEV_INTR_MODE_MSIX: + for (i = 0; i < ARRAY_SIZE(fnic->msix); i++) + if (fnic->msix[i].requested) + free_irq(fnic->msix_entry[i].vector, + fnic->msix[i].devid); + break; + + default: + break; + } +} + +int fnic_request_intr(struct fnic *fnic) +{ + int err = 0; + int i; + + switch (vnic_dev_get_intr_mode(fnic->vdev)) { + + case VNIC_DEV_INTR_MODE_INTX: + err = request_irq(fnic->pdev->irq, &fnic_isr_legacy, + IRQF_SHARED, DRV_NAME, fnic); + break; + + case VNIC_DEV_INTR_MODE_MSI: + err = request_irq(fnic->pdev->irq, &fnic_isr_msi, + 0, fnic->name, fnic); + break; + + case VNIC_DEV_INTR_MODE_MSIX: + + sprintf(fnic->msix[FNIC_MSIX_RQ].devname, + "%.11s-fcs-rq", fnic->name); + fnic->msix[FNIC_MSIX_RQ].isr = fnic_isr_msix_rq; + fnic->msix[FNIC_MSIX_RQ].devid = fnic; + + sprintf(fnic->msix[FNIC_MSIX_WQ].devname, + "%.11s-fcs-wq", fnic->name); + fnic->msix[FNIC_MSIX_WQ].isr = fnic_isr_msix_wq; + fnic->msix[FNIC_MSIX_WQ].devid = fnic; + + sprintf(fnic->msix[FNIC_MSIX_WQ_COPY].devname, + "%.11s-scsi-wq", fnic->name); + fnic->msix[FNIC_MSIX_WQ_COPY].isr = fnic_isr_msix_wq_copy; + fnic->msix[FNIC_MSIX_WQ_COPY].devid = fnic; + + sprintf(fnic->msix[FNIC_MSIX_ERR_NOTIFY].devname, + "%.11s-err-notify", fnic->name); + fnic->msix[FNIC_MSIX_ERR_NOTIFY].isr = + fnic_isr_msix_err_notify; + fnic->msix[FNIC_MSIX_ERR_NOTIFY].devid = fnic; + + for (i = 0; i < ARRAY_SIZE(fnic->msix); i++) { + err = request_irq(fnic->msix_entry[i].vector, + fnic->msix[i].isr, 0, + fnic->msix[i].devname, + fnic->msix[i].devid); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "MSIX: request_irq" + " failed %d\n", err); + fnic_free_intr(fnic); + break; + } + fnic->msix[i].requested = 1; + } + break; + + default: + break; + } + + return err; +} + +int fnic_set_intr_mode(struct fnic *fnic) +{ + unsigned int n = ARRAY_SIZE(fnic->rq); + unsigned int m = ARRAY_SIZE(fnic->wq); + unsigned int o = ARRAY_SIZE(fnic->wq_copy); + unsigned int i; + + /* + * Set interrupt mode (INTx, MSI, MSI-X) depending + * system capabilities. + * + * Try MSI-X first + * + * We need n RQs, m WQs, o Copy WQs, n+m+o CQs, and n+m+o+1 INTRs + * (last INTR is used for WQ/RQ errors and notification area) + */ + + BUG_ON(ARRAY_SIZE(fnic->msix_entry) < n + m + o + 1); + for (i = 0; i < n + m + o + 1; i++) + fnic->msix_entry[i].entry = i; + + if (fnic->rq_count >= n && + fnic->raw_wq_count >= m && + fnic->wq_copy_count >= o && + fnic->cq_count >= n + m + o) { + if (!pci_enable_msix(fnic->pdev, fnic->msix_entry, + n + m + o + 1)) { + fnic->rq_count = n; + fnic->raw_wq_count = m; + fnic->wq_copy_count = o; + fnic->wq_count = m + o; + fnic->cq_count = n + m + o; + fnic->intr_count = n + m + o + 1; + fnic->err_intr_offset = FNIC_MSIX_ERR_NOTIFY; + + FNIC_ISR_DBG(KERN_DEBUG, fnic->lport->host, + "Using MSI-X Interrupts\n"); + vnic_dev_set_intr_mode(fnic->vdev, + VNIC_DEV_INTR_MODE_MSIX); + return 0; + } + } + + /* + * Next try MSI + * We need 1 RQ, 1 WQ, 1 WQ_COPY, 3 CQs, and 1 INTR + */ + if (fnic->rq_count >= 1 && + fnic->raw_wq_count >= 1 && + fnic->wq_copy_count >= 1 && + fnic->cq_count >= 3 && + fnic->intr_count >= 1 && + !pci_enable_msi(fnic->pdev)) { + + fnic->rq_count = 1; + fnic->raw_wq_count = 1; + fnic->wq_copy_count = 1; + fnic->wq_count = 2; + fnic->cq_count = 3; + fnic->intr_count = 1; + fnic->err_intr_offset = 0; + + FNIC_ISR_DBG(KERN_DEBUG, fnic->lport->host, + "Using MSI Interrupts\n"); + vnic_dev_set_intr_mode(fnic->vdev, VNIC_DEV_INTR_MODE_MSI); + + return 0; + } + + /* + * Next try INTx + * We need 1 RQ, 1 WQ, 1 WQ_COPY, 3 CQs, and 3 INTRs + * 1 INTR is used for all 3 queues, 1 INTR for queue errors + * 1 INTR for notification area + */ + + if (fnic->rq_count >= 1 && + fnic->raw_wq_count >= 1 && + fnic->wq_copy_count >= 1 && + fnic->cq_count >= 3 && + fnic->intr_count >= 3) { + + fnic->rq_count = 1; + fnic->raw_wq_count = 1; + fnic->wq_copy_count = 1; + fnic->cq_count = 3; + fnic->intr_count = 3; + + FNIC_ISR_DBG(KERN_DEBUG, fnic->lport->host, + "Using Legacy Interrupts\n"); + vnic_dev_set_intr_mode(fnic->vdev, VNIC_DEV_INTR_MODE_INTX); + + return 0; + } + + vnic_dev_set_intr_mode(fnic->vdev, VNIC_DEV_INTR_MODE_UNKNOWN); + + return -EINVAL; +} + +void fnic_clear_intr_mode(struct fnic *fnic) +{ + switch (vnic_dev_get_intr_mode(fnic->vdev)) { + case VNIC_DEV_INTR_MODE_MSIX: + pci_disable_msix(fnic->pdev); + break; + case VNIC_DEV_INTR_MODE_MSI: + pci_disable_msi(fnic->pdev); + break; + default: + break; + } + + vnic_dev_set_intr_mode(fnic->vdev, VNIC_DEV_INTR_MODE_INTX); +} + diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c new file mode 100644 index 000000000000..32ef6b87d895 --- /dev/null +++ b/drivers/scsi/fnic/fnic_main.c @@ -0,0 +1,942 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/module.h> +#include <linux/mempool.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/skbuff.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/workqueue.h> +#include <scsi/scsi_host.h> +#include <scsi/scsi_transport.h> +#include <scsi/scsi_transport_fc.h> +#include <scsi/scsi_tcq.h> +#include <scsi/libfc.h> +#include <scsi/fc_frame.h> + +#include "vnic_dev.h" +#include "vnic_intr.h" +#include "vnic_stats.h" +#include "fnic_io.h" +#include "fnic.h" + +#define PCI_DEVICE_ID_CISCO_FNIC 0x0045 + +/* Timer to poll notification area for events. Used for MSI interrupts */ +#define FNIC_NOTIFY_TIMER_PERIOD (2 * HZ) + +static struct kmem_cache *fnic_sgl_cache[FNIC_SGL_NUM_CACHES]; +static struct kmem_cache *fnic_io_req_cache; +LIST_HEAD(fnic_list); +DEFINE_SPINLOCK(fnic_list_lock); + +/* Supported devices by fnic module */ +static struct pci_device_id fnic_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_FNIC) }, + { 0, } +}; + +MODULE_DESCRIPTION(DRV_DESCRIPTION); +MODULE_AUTHOR("Abhijeet Joglekar <abjoglek@cisco.com>, " + "Joseph R. Eykholt <jeykholt@cisco.com>"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, fnic_id_table); + +unsigned int fnic_log_level; +module_param(fnic_log_level, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(fnic_log_level, "bit mask of fnic logging levels"); + + +static struct libfc_function_template fnic_transport_template = { + .frame_send = fnic_send, + .fcp_abort_io = fnic_empty_scsi_cleanup, + .fcp_cleanup = fnic_empty_scsi_cleanup, + .exch_mgr_reset = fnic_exch_mgr_reset +}; + +static int fnic_slave_alloc(struct scsi_device *sdev) +{ + struct fc_rport *rport = starget_to_rport(scsi_target(sdev)); + struct fc_lport *lp = shost_priv(sdev->host); + struct fnic *fnic = lport_priv(lp); + + sdev->tagged_supported = 1; + + if (!rport || fc_remote_port_chkready(rport)) + return -ENXIO; + + scsi_activate_tcq(sdev, FNIC_DFLT_QUEUE_DEPTH); + rport->dev_loss_tmo = fnic->config.port_down_timeout / 1000; + + return 0; +} + +static struct scsi_host_template fnic_host_template = { + .module = THIS_MODULE, + .name = DRV_NAME, + .queuecommand = fnic_queuecommand, + .eh_abort_handler = fnic_abort_cmd, + .eh_device_reset_handler = fnic_device_reset, + .eh_host_reset_handler = fnic_host_reset, + .slave_alloc = fnic_slave_alloc, + .change_queue_depth = fc_change_queue_depth, + .change_queue_type = fc_change_queue_type, + .this_id = -1, + .cmd_per_lun = 3, + .can_queue = FNIC_MAX_IO_REQ, + .use_clustering = ENABLE_CLUSTERING, + .sg_tablesize = FNIC_MAX_SG_DESC_CNT, + .max_sectors = 0xffff, + .shost_attrs = fnic_attrs, +}; + +static void fnic_get_host_speed(struct Scsi_Host *shost); +static struct scsi_transport_template *fnic_fc_transport; +static struct fc_host_statistics *fnic_get_stats(struct Scsi_Host *); + +static struct fc_function_template fnic_fc_functions = { + + .show_host_node_name = 1, + .show_host_port_name = 1, + .show_host_supported_classes = 1, + .show_host_supported_fc4s = 1, + .show_host_active_fc4s = 1, + .show_host_maxframe_size = 1, + .show_host_port_id = 1, + .show_host_supported_speeds = 1, + .get_host_speed = fnic_get_host_speed, + .show_host_speed = 1, + .show_host_port_type = 1, + .get_host_port_state = fc_get_host_port_state, + .show_host_port_state = 1, + .show_host_symbolic_name = 1, + .show_rport_maxframe_size = 1, + .show_rport_supported_classes = 1, + .show_host_fabric_name = 1, + .show_starget_node_name = 1, + .show_starget_port_name = 1, + .show_starget_port_id = 1, + .show_rport_dev_loss_tmo = 1, + .issue_fc_host_lip = fnic_reset, + .get_fc_host_stats = fnic_get_stats, + .dd_fcrport_size = sizeof(struct fc_rport_libfc_priv), + .terminate_rport_io = fnic_terminate_rport_io, +}; + +static void fnic_get_host_speed(struct Scsi_Host *shost) +{ + struct fc_lport *lp = shost_priv(shost); + struct fnic *fnic = lport_priv(lp); + u32 port_speed = vnic_dev_port_speed(fnic->vdev); + + /* Add in other values as they get defined in fw */ + switch (port_speed) { + case 10000: + fc_host_speed(shost) = FC_PORTSPEED_10GBIT; + break; + default: + fc_host_speed(shost) = FC_PORTSPEED_10GBIT; + break; + } +} + +static struct fc_host_statistics *fnic_get_stats(struct Scsi_Host *host) +{ + int ret; + struct fc_lport *lp = shost_priv(host); + struct fnic *fnic = lport_priv(lp); + struct fc_host_statistics *stats = &lp->host_stats; + struct vnic_stats *vs; + unsigned long flags; + + if (time_before(jiffies, fnic->stats_time + HZ / FNIC_STATS_RATE_LIMIT)) + return stats; + fnic->stats_time = jiffies; + + spin_lock_irqsave(&fnic->fnic_lock, flags); + ret = vnic_dev_stats_dump(fnic->vdev, &fnic->stats); + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + if (ret) { + FNIC_MAIN_DBG(KERN_DEBUG, fnic->lport->host, + "fnic: Get vnic stats failed" + " 0x%x", ret); + return stats; + } + vs = fnic->stats; + stats->tx_frames = vs->tx.tx_unicast_frames_ok; + stats->tx_words = vs->tx.tx_unicast_bytes_ok / 4; + stats->rx_frames = vs->rx.rx_unicast_frames_ok; + stats->rx_words = vs->rx.rx_unicast_bytes_ok / 4; + stats->error_frames = vs->tx.tx_errors + vs->rx.rx_errors; + stats->dumped_frames = vs->tx.tx_drops + vs->rx.rx_drop; + stats->invalid_crc_count = vs->rx.rx_crc_errors; + stats->seconds_since_last_reset = (jiffies - lp->boot_time) / HZ; + stats->fcp_input_megabytes = div_u64(fnic->fcp_input_bytes, 1000000); + stats->fcp_output_megabytes = div_u64(fnic->fcp_output_bytes, 1000000); + + return stats; +} + +void fnic_log_q_error(struct fnic *fnic) +{ + unsigned int i; + u32 error_status; + + for (i = 0; i < fnic->raw_wq_count; i++) { + error_status = ioread32(&fnic->wq[i].ctrl->error_status); + if (error_status) + shost_printk(KERN_ERR, fnic->lport->host, + "WQ[%d] error_status" + " %d\n", i, error_status); + } + + for (i = 0; i < fnic->rq_count; i++) { + error_status = ioread32(&fnic->rq[i].ctrl->error_status); + if (error_status) + shost_printk(KERN_ERR, fnic->lport->host, + "RQ[%d] error_status" + " %d\n", i, error_status); + } + + for (i = 0; i < fnic->wq_copy_count; i++) { + error_status = ioread32(&fnic->wq_copy[i].ctrl->error_status); + if (error_status) + shost_printk(KERN_ERR, fnic->lport->host, + "CWQ[%d] error_status" + " %d\n", i, error_status); + } +} + +void fnic_handle_link_event(struct fnic *fnic) +{ + unsigned long flags; + + spin_lock_irqsave(&fnic->fnic_lock, flags); + if (fnic->stop_rx_link_events) { + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + return; + } + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + queue_work(fnic_event_queue, &fnic->link_work); + +} + +static int fnic_notify_set(struct fnic *fnic) +{ + int err; + + switch (vnic_dev_get_intr_mode(fnic->vdev)) { + case VNIC_DEV_INTR_MODE_INTX: + err = vnic_dev_notify_set(fnic->vdev, FNIC_INTX_NOTIFY); + break; + case VNIC_DEV_INTR_MODE_MSI: + err = vnic_dev_notify_set(fnic->vdev, -1); + break; + case VNIC_DEV_INTR_MODE_MSIX: + err = vnic_dev_notify_set(fnic->vdev, FNIC_MSIX_ERR_NOTIFY); + break; + default: + shost_printk(KERN_ERR, fnic->lport->host, + "Interrupt mode should be set up" + " before devcmd notify set %d\n", + vnic_dev_get_intr_mode(fnic->vdev)); + err = -1; + break; + } + + return err; +} + +static void fnic_notify_timer(unsigned long data) +{ + struct fnic *fnic = (struct fnic *)data; + + fnic_handle_link_event(fnic); + mod_timer(&fnic->notify_timer, + round_jiffies(jiffies + FNIC_NOTIFY_TIMER_PERIOD)); +} + +static void fnic_notify_timer_start(struct fnic *fnic) +{ + switch (vnic_dev_get_intr_mode(fnic->vdev)) { + case VNIC_DEV_INTR_MODE_MSI: + /* + * Schedule first timeout immediately. The driver is + * initiatialized and ready to look for link up notification + */ + mod_timer(&fnic->notify_timer, jiffies); + break; + default: + /* Using intr for notification for INTx/MSI-X */ + break; + }; +} + +static int fnic_dev_wait(struct vnic_dev *vdev, + int (*start)(struct vnic_dev *, int), + int (*finished)(struct vnic_dev *, int *), + int arg) +{ + unsigned long time; + int done; + int err; + + err = start(vdev, arg); + if (err) + return err; + + /* Wait for func to complete...2 seconds max */ + time = jiffies + (HZ * 2); + do { + err = finished(vdev, &done); + if (err) + return err; + if (done) + return 0; + schedule_timeout_uninterruptible(HZ / 10); + } while (time_after(time, jiffies)); + + return -ETIMEDOUT; +} + +static int fnic_cleanup(struct fnic *fnic) +{ + unsigned int i; + int err; + unsigned long flags; + struct fc_frame *flogi = NULL; + struct fc_frame *flogi_resp = NULL; + + vnic_dev_disable(fnic->vdev); + for (i = 0; i < fnic->intr_count; i++) + vnic_intr_mask(&fnic->intr[i]); + + for (i = 0; i < fnic->rq_count; i++) { + err = vnic_rq_disable(&fnic->rq[i]); + if (err) + return err; + } + for (i = 0; i < fnic->raw_wq_count; i++) { + err = vnic_wq_disable(&fnic->wq[i]); + if (err) + return err; + } + for (i = 0; i < fnic->wq_copy_count; i++) { + err = vnic_wq_copy_disable(&fnic->wq_copy[i]); + if (err) + return err; + } + + /* Clean up completed IOs and FCS frames */ + fnic_wq_copy_cmpl_handler(fnic, -1); + fnic_wq_cmpl_handler(fnic, -1); + fnic_rq_cmpl_handler(fnic, -1); + + /* Clean up the IOs and FCS frames that have not completed */ + for (i = 0; i < fnic->raw_wq_count; i++) + vnic_wq_clean(&fnic->wq[i], fnic_free_wq_buf); + for (i = 0; i < fnic->rq_count; i++) + vnic_rq_clean(&fnic->rq[i], fnic_free_rq_buf); + for (i = 0; i < fnic->wq_copy_count; i++) + vnic_wq_copy_clean(&fnic->wq_copy[i], + fnic_wq_copy_cleanup_handler); + + for (i = 0; i < fnic->cq_count; i++) + vnic_cq_clean(&fnic->cq[i]); + for (i = 0; i < fnic->intr_count; i++) + vnic_intr_clean(&fnic->intr[i]); + + /* + * Remove cached flogi and flogi resp frames if any + * These frames are not in any queue, and therefore queue + * cleanup does not clean them. So clean them explicitly + */ + spin_lock_irqsave(&fnic->fnic_lock, flags); + flogi = fnic->flogi; + fnic->flogi = NULL; + flogi_resp = fnic->flogi_resp; + fnic->flogi_resp = NULL; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + if (flogi) + dev_kfree_skb(fp_skb(flogi)); + + if (flogi_resp) + dev_kfree_skb(fp_skb(flogi_resp)); + + mempool_destroy(fnic->io_req_pool); + for (i = 0; i < FNIC_SGL_NUM_CACHES; i++) + mempool_destroy(fnic->io_sgl_pool[i]); + + return 0; +} + +static void fnic_iounmap(struct fnic *fnic) +{ + if (fnic->bar0.vaddr) + iounmap(fnic->bar0.vaddr); +} + +/* + * Allocate element for mempools requiring GFP_DMA flag. + * Otherwise, checks in kmem_flagcheck() hit BUG_ON(). + */ +static void *fnic_alloc_slab_dma(gfp_t gfp_mask, void *pool_data) +{ + struct kmem_cache *mem = pool_data; + + return kmem_cache_alloc(mem, gfp_mask | GFP_ATOMIC | GFP_DMA); +} + +static int __devinit fnic_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct Scsi_Host *host; + struct fc_lport *lp; + struct fnic *fnic; + mempool_t *pool; + int err; + int i; + unsigned long flags; + + /* + * Allocate SCSI Host and set up association between host, + * local port, and fnic + */ + host = scsi_host_alloc(&fnic_host_template, + sizeof(struct fc_lport) + sizeof(struct fnic)); + if (!host) { + printk(KERN_ERR PFX "Unable to alloc SCSI host\n"); + err = -ENOMEM; + goto err_out; + } + lp = shost_priv(host); + lp->host = host; + fnic = lport_priv(lp); + fnic->lport = lp; + + snprintf(fnic->name, sizeof(fnic->name) - 1, "%s%d", DRV_NAME, + host->host_no); + + host->transportt = fnic_fc_transport; + + err = scsi_init_shared_tag_map(host, FNIC_MAX_IO_REQ); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Unable to alloc shared tag map\n"); + goto err_out_free_hba; + } + + /* Setup PCI resources */ + pci_set_drvdata(pdev, fnic); + + fnic->pdev = pdev; + + err = pci_enable_device(pdev); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Cannot enable PCI device, aborting.\n"); + goto err_out_free_hba; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Cannot enable PCI resources, aborting\n"); + goto err_out_disable_device; + } + + pci_set_master(pdev); + + /* Query PCI controller on system for DMA addressing + * limitation for the device. Try 40-bit first, and + * fail to 32-bit. + */ + err = pci_set_dma_mask(pdev, DMA_40BIT_MASK); + if (err) { + err = pci_set_dma_mask(pdev, DMA_32BIT_MASK); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "No usable DMA configuration " + "aborting\n"); + goto err_out_release_regions; + } + err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Unable to obtain 32-bit DMA " + "for consistent allocations, aborting.\n"); + goto err_out_release_regions; + } + } else { + err = pci_set_consistent_dma_mask(pdev, DMA_40BIT_MASK); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Unable to obtain 40-bit DMA " + "for consistent allocations, aborting.\n"); + goto err_out_release_regions; + } + } + + /* Map vNIC resources from BAR0 */ + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { + shost_printk(KERN_ERR, fnic->lport->host, + "BAR0 not memory-map'able, aborting.\n"); + err = -ENODEV; + goto err_out_release_regions; + } + + fnic->bar0.vaddr = pci_iomap(pdev, 0, 0); + fnic->bar0.bus_addr = pci_resource_start(pdev, 0); + fnic->bar0.len = pci_resource_len(pdev, 0); + + if (!fnic->bar0.vaddr) { + shost_printk(KERN_ERR, fnic->lport->host, + "Cannot memory-map BAR0 res hdr, " + "aborting.\n"); + err = -ENODEV; + goto err_out_release_regions; + } + + fnic->vdev = vnic_dev_register(NULL, fnic, pdev, &fnic->bar0); + if (!fnic->vdev) { + shost_printk(KERN_ERR, fnic->lport->host, + "vNIC registration failed, " + "aborting.\n"); + err = -ENODEV; + goto err_out_iounmap; + } + + err = fnic_dev_wait(fnic->vdev, vnic_dev_open, + vnic_dev_open_done, 0); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "vNIC dev open failed, aborting.\n"); + goto err_out_vnic_unregister; + } + + err = vnic_dev_init(fnic->vdev, 0); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "vNIC dev init failed, aborting.\n"); + goto err_out_dev_close; + } + + err = vnic_dev_mac_addr(fnic->vdev, fnic->mac_addr); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "vNIC get MAC addr failed \n"); + goto err_out_dev_close; + } + + /* Get vNIC configuration */ + err = fnic_get_vnic_config(fnic); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Get vNIC configuration failed, " + "aborting.\n"); + goto err_out_dev_close; + } + host->max_lun = fnic->config.luns_per_tgt; + host->max_id = FNIC_MAX_FCP_TARGET; + + fnic_get_res_counts(fnic); + + err = fnic_set_intr_mode(fnic); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Failed to set intr mode, " + "aborting.\n"); + goto err_out_dev_close; + } + + err = fnic_request_intr(fnic); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Unable to request irq.\n"); + goto err_out_clear_intr; + } + + err = fnic_alloc_vnic_resources(fnic); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Failed to alloc vNIC resources, " + "aborting.\n"); + goto err_out_free_intr; + } + + + /* initialize all fnic locks */ + spin_lock_init(&fnic->fnic_lock); + + for (i = 0; i < FNIC_WQ_MAX; i++) + spin_lock_init(&fnic->wq_lock[i]); + + for (i = 0; i < FNIC_WQ_COPY_MAX; i++) { + spin_lock_init(&fnic->wq_copy_lock[i]); + fnic->wq_copy_desc_low[i] = DESC_CLEAN_LOW_WATERMARK; + fnic->fw_ack_recd[i] = 0; + fnic->fw_ack_index[i] = -1; + } + + for (i = 0; i < FNIC_IO_LOCKS; i++) + spin_lock_init(&fnic->io_req_lock[i]); + + fnic->io_req_pool = mempool_create_slab_pool(2, fnic_io_req_cache); + if (!fnic->io_req_pool) + goto err_out_free_resources; + + pool = mempool_create(2, fnic_alloc_slab_dma, mempool_free_slab, + fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]); + if (!pool) + goto err_out_free_ioreq_pool; + fnic->io_sgl_pool[FNIC_SGL_CACHE_DFLT] = pool; + + pool = mempool_create(2, fnic_alloc_slab_dma, mempool_free_slab, + fnic_sgl_cache[FNIC_SGL_CACHE_MAX]); + if (!pool) + goto err_out_free_dflt_pool; + fnic->io_sgl_pool[FNIC_SGL_CACHE_MAX] = pool; + + /* setup vlan config, hw inserts vlan header */ + fnic->vlan_hw_insert = 1; + fnic->vlan_id = 0; + + fnic->flogi_oxid = FC_XID_UNKNOWN; + fnic->flogi = NULL; + fnic->flogi_resp = NULL; + fnic->state = FNIC_IN_FC_MODE; + + /* Enable hardware stripping of vlan header on ingress */ + fnic_set_nic_config(fnic, 0, 0, 0, 0, 0, 0, 1); + + /* Setup notification buffer area */ + err = fnic_notify_set(fnic); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "Failed to alloc notify buffer, aborting.\n"); + goto err_out_free_max_pool; + } + + /* Setup notify timer when using MSI interrupts */ + if (vnic_dev_get_intr_mode(fnic->vdev) == VNIC_DEV_INTR_MODE_MSI) + setup_timer(&fnic->notify_timer, + fnic_notify_timer, (unsigned long)fnic); + + /* allocate RQ buffers and post them to RQ*/ + for (i = 0; i < fnic->rq_count; i++) { + err = vnic_rq_fill(&fnic->rq[i], fnic_alloc_rq_frame); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "fnic_alloc_rq_frame can't alloc " + "frame\n"); + goto err_out_free_rq_buf; + } + } + + /* + * Initialization done with PCI system, hardware, firmware. + * Add host to SCSI + */ + err = scsi_add_host(lp->host, &pdev->dev); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "fnic: scsi_add_host failed...exiting\n"); + goto err_out_free_rq_buf; + } + + /* Start local port initiatialization */ + + lp->link_up = 0; + lp->tt = fnic_transport_template; + + lp->emp = fc_exch_mgr_alloc(lp, FC_CLASS_3, + FCPIO_HOST_EXCH_RANGE_START, + FCPIO_HOST_EXCH_RANGE_END); + if (!lp->emp) { + err = -ENOMEM; + goto err_out_remove_scsi_host; + } + + lp->max_retry_count = fnic->config.flogi_retries; + lp->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS | + FCP_SPPF_CONF_COMPL); + if (fnic->config.flags & VFCF_FCP_SEQ_LVL_ERR) + lp->service_params |= FCP_SPPF_RETRY; + + lp->boot_time = jiffies; + lp->e_d_tov = fnic->config.ed_tov; + lp->r_a_tov = fnic->config.ra_tov; + lp->link_supported_speeds = FC_PORTSPEED_10GBIT; + fc_set_wwnn(lp, fnic->config.node_wwn); + fc_set_wwpn(lp, fnic->config.port_wwn); + + fc_exch_init(lp); + fc_lport_init(lp); + fc_elsct_init(lp); + fc_rport_init(lp); + fc_disc_init(lp); + + fc_lport_config(lp); + + if (fc_set_mfs(lp, fnic->config.maxdatafieldsize + + sizeof(struct fc_frame_header))) { + err = -EINVAL; + goto err_out_free_exch_mgr; + } + fc_host_maxframe_size(lp->host) = lp->mfs; + + sprintf(fc_host_symbolic_name(lp->host), + DRV_NAME " v" DRV_VERSION " over %s", fnic->name); + + spin_lock_irqsave(&fnic_list_lock, flags); + list_add_tail(&fnic->list, &fnic_list); + spin_unlock_irqrestore(&fnic_list_lock, flags); + + INIT_WORK(&fnic->link_work, fnic_handle_link); + INIT_WORK(&fnic->frame_work, fnic_handle_frame); + skb_queue_head_init(&fnic->frame_queue); + + /* Enable all queues */ + for (i = 0; i < fnic->raw_wq_count; i++) + vnic_wq_enable(&fnic->wq[i]); + for (i = 0; i < fnic->rq_count; i++) + vnic_rq_enable(&fnic->rq[i]); + for (i = 0; i < fnic->wq_copy_count; i++) + vnic_wq_copy_enable(&fnic->wq_copy[i]); + + fc_fabric_login(lp); + + vnic_dev_enable(fnic->vdev); + for (i = 0; i < fnic->intr_count; i++) + vnic_intr_unmask(&fnic->intr[i]); + + fnic_notify_timer_start(fnic); + + return 0; + +err_out_free_exch_mgr: + fc_exch_mgr_free(lp->emp); +err_out_remove_scsi_host: + fc_remove_host(fnic->lport->host); + scsi_remove_host(fnic->lport->host); +err_out_free_rq_buf: + for (i = 0; i < fnic->rq_count; i++) + vnic_rq_clean(&fnic->rq[i], fnic_free_rq_buf); + vnic_dev_notify_unset(fnic->vdev); +err_out_free_max_pool: + mempool_destroy(fnic->io_sgl_pool[FNIC_SGL_CACHE_MAX]); +err_out_free_dflt_pool: + mempool_destroy(fnic->io_sgl_pool[FNIC_SGL_CACHE_DFLT]); +err_out_free_ioreq_pool: + mempool_destroy(fnic->io_req_pool); +err_out_free_resources: + fnic_free_vnic_resources(fnic); +err_out_free_intr: + fnic_free_intr(fnic); +err_out_clear_intr: + fnic_clear_intr_mode(fnic); +err_out_dev_close: + vnic_dev_close(fnic->vdev); +err_out_vnic_unregister: + vnic_dev_unregister(fnic->vdev); +err_out_iounmap: + fnic_iounmap(fnic); +err_out_release_regions: + pci_release_regions(pdev); +err_out_disable_device: + pci_disable_device(pdev); +err_out_free_hba: + scsi_host_put(lp->host); +err_out: + return err; +} + +static void __devexit fnic_remove(struct pci_dev *pdev) +{ + struct fnic *fnic = pci_get_drvdata(pdev); + unsigned long flags; + + /* + * Mark state so that the workqueue thread stops forwarding + * received frames and link events to the local port. ISR and + * other threads that can queue work items will also stop + * creating work items on the fnic workqueue + */ + spin_lock_irqsave(&fnic->fnic_lock, flags); + fnic->stop_rx_link_events = 1; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + if (vnic_dev_get_intr_mode(fnic->vdev) == VNIC_DEV_INTR_MODE_MSI) + del_timer_sync(&fnic->notify_timer); + + /* + * Flush the fnic event queue. After this call, there should + * be no event queued for this fnic device in the workqueue + */ + flush_workqueue(fnic_event_queue); + skb_queue_purge(&fnic->frame_queue); + + /* + * Log off the fabric. This stops all remote ports, dns port, + * logs off the fabric. This flushes all rport, disc, lport work + * before returning + */ + fc_fabric_logoff(fnic->lport); + + spin_lock_irqsave(&fnic->fnic_lock, flags); + fnic->in_remove = 1; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + fc_lport_destroy(fnic->lport); + + /* + * This stops the fnic device, masks all interrupts. Completed + * CQ entries are drained. Posted WQ/RQ/Copy-WQ entries are + * cleaned up + */ + fnic_cleanup(fnic); + + BUG_ON(!skb_queue_empty(&fnic->frame_queue)); + + spin_lock_irqsave(&fnic_list_lock, flags); + list_del(&fnic->list); + spin_unlock_irqrestore(&fnic_list_lock, flags); + + fc_remove_host(fnic->lport->host); + scsi_remove_host(fnic->lport->host); + fc_exch_mgr_free(fnic->lport->emp); + vnic_dev_notify_unset(fnic->vdev); + fnic_free_vnic_resources(fnic); + fnic_free_intr(fnic); + fnic_clear_intr_mode(fnic); + vnic_dev_close(fnic->vdev); + vnic_dev_unregister(fnic->vdev); + fnic_iounmap(fnic); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + scsi_host_put(fnic->lport->host); +} + +static struct pci_driver fnic_driver = { + .name = DRV_NAME, + .id_table = fnic_id_table, + .probe = fnic_probe, + .remove = __devexit_p(fnic_remove), +}; + +static int __init fnic_init_module(void) +{ + size_t len; + int err = 0; + + printk(KERN_INFO PFX "%s, ver %s\n", DRV_DESCRIPTION, DRV_VERSION); + + /* Create a cache for allocation of default size sgls */ + len = sizeof(struct fnic_dflt_sgl_list); + fnic_sgl_cache[FNIC_SGL_CACHE_DFLT] = kmem_cache_create + ("fnic_sgl_dflt", len + FNIC_SG_DESC_ALIGN, FNIC_SG_DESC_ALIGN, + SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA, + NULL); + if (!fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]) { + printk(KERN_ERR PFX "failed to create fnic dflt sgl slab\n"); + err = -ENOMEM; + goto err_create_fnic_sgl_slab_dflt; + } + + /* Create a cache for allocation of max size sgls*/ + len = sizeof(struct fnic_sgl_list); + fnic_sgl_cache[FNIC_SGL_CACHE_MAX] = kmem_cache_create + ("fnic_sgl_max", len + FNIC_SG_DESC_ALIGN, FNIC_SG_DESC_ALIGN, + SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA, + NULL); + if (!fnic_sgl_cache[FNIC_SGL_CACHE_MAX]) { + printk(KERN_ERR PFX "failed to create fnic max sgl slab\n"); + err = -ENOMEM; + goto err_create_fnic_sgl_slab_max; + } + + /* Create a cache of io_req structs for use via mempool */ + fnic_io_req_cache = kmem_cache_create("fnic_io_req", + sizeof(struct fnic_io_req), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!fnic_io_req_cache) { + printk(KERN_ERR PFX "failed to create fnic io_req slab\n"); + err = -ENOMEM; + goto err_create_fnic_ioreq_slab; + } + + fnic_event_queue = create_singlethread_workqueue("fnic_event_wq"); + if (!fnic_event_queue) { + printk(KERN_ERR PFX "fnic work queue create failed\n"); + err = -ENOMEM; + goto err_create_fnic_workq; + } + + spin_lock_init(&fnic_list_lock); + INIT_LIST_HEAD(&fnic_list); + + fnic_fc_transport = fc_attach_transport(&fnic_fc_functions); + if (!fnic_fc_transport) { + printk(KERN_ERR PFX "fc_attach_transport error\n"); + err = -ENOMEM; + goto err_fc_transport; + } + + /* register the driver with PCI system */ + err = pci_register_driver(&fnic_driver); + if (err < 0) { + printk(KERN_ERR PFX "pci register error\n"); + goto err_pci_register; + } + return err; + +err_pci_register: + fc_release_transport(fnic_fc_transport); +err_fc_transport: + destroy_workqueue(fnic_event_queue); +err_create_fnic_workq: + kmem_cache_destroy(fnic_io_req_cache); +err_create_fnic_ioreq_slab: + kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_MAX]); +err_create_fnic_sgl_slab_max: + kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]); +err_create_fnic_sgl_slab_dflt: + return err; +} + +static void __exit fnic_cleanup_module(void) +{ + pci_unregister_driver(&fnic_driver); + destroy_workqueue(fnic_event_queue); + kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_MAX]); + kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]); + kmem_cache_destroy(fnic_io_req_cache); + fc_release_transport(fnic_fc_transport); +} + +module_init(fnic_init_module); +module_exit(fnic_cleanup_module); + diff --git a/drivers/scsi/fnic/fnic_res.c b/drivers/scsi/fnic/fnic_res.c new file mode 100644 index 000000000000..7ba61ec715d2 --- /dev/null +++ b/drivers/scsi/fnic/fnic_res.c @@ -0,0 +1,444 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/pci.h> +#include "wq_enet_desc.h" +#include "rq_enet_desc.h" +#include "cq_enet_desc.h" +#include "vnic_resource.h" +#include "vnic_dev.h" +#include "vnic_wq.h" +#include "vnic_rq.h" +#include "vnic_cq.h" +#include "vnic_intr.h" +#include "vnic_stats.h" +#include "vnic_nic.h" +#include "fnic.h" + +int fnic_get_vnic_config(struct fnic *fnic) +{ + struct vnic_fc_config *c = &fnic->config; + int err; + +#define GET_CONFIG(m) \ + do { \ + err = vnic_dev_spec(fnic->vdev, \ + offsetof(struct vnic_fc_config, m), \ + sizeof(c->m), &c->m); \ + if (err) { \ + shost_printk(KERN_ERR, fnic->lport->host, \ + "Error getting %s, %d\n", #m, \ + err); \ + return err; \ + } \ + } while (0); + + GET_CONFIG(node_wwn); + GET_CONFIG(port_wwn); + GET_CONFIG(wq_enet_desc_count); + GET_CONFIG(wq_copy_desc_count); + GET_CONFIG(rq_desc_count); + GET_CONFIG(maxdatafieldsize); + GET_CONFIG(ed_tov); + GET_CONFIG(ra_tov); + GET_CONFIG(intr_timer); + GET_CONFIG(intr_timer_type); + GET_CONFIG(flags); + GET_CONFIG(flogi_retries); + GET_CONFIG(flogi_timeout); + GET_CONFIG(plogi_retries); + GET_CONFIG(plogi_timeout); + GET_CONFIG(io_throttle_count); + GET_CONFIG(link_down_timeout); + GET_CONFIG(port_down_timeout); + GET_CONFIG(port_down_io_retries); + GET_CONFIG(luns_per_tgt); + + c->wq_enet_desc_count = + min_t(u32, VNIC_FNIC_WQ_DESCS_MAX, + max_t(u32, VNIC_FNIC_WQ_DESCS_MIN, + c->wq_enet_desc_count)); + c->wq_enet_desc_count = ALIGN(c->wq_enet_desc_count, 16); + + c->wq_copy_desc_count = + min_t(u32, VNIC_FNIC_WQ_COPY_DESCS_MAX, + max_t(u32, VNIC_FNIC_WQ_COPY_DESCS_MIN, + c->wq_copy_desc_count)); + c->wq_copy_desc_count = ALIGN(c->wq_copy_desc_count, 16); + + c->rq_desc_count = + min_t(u32, VNIC_FNIC_RQ_DESCS_MAX, + max_t(u32, VNIC_FNIC_RQ_DESCS_MIN, + c->rq_desc_count)); + c->rq_desc_count = ALIGN(c->rq_desc_count, 16); + + c->maxdatafieldsize = + min_t(u16, VNIC_FNIC_MAXDATAFIELDSIZE_MAX, + max_t(u16, VNIC_FNIC_MAXDATAFIELDSIZE_MIN, + c->maxdatafieldsize)); + c->ed_tov = + min_t(u32, VNIC_FNIC_EDTOV_MAX, + max_t(u32, VNIC_FNIC_EDTOV_MIN, + c->ed_tov)); + + c->ra_tov = + min_t(u32, VNIC_FNIC_RATOV_MAX, + max_t(u32, VNIC_FNIC_RATOV_MIN, + c->ra_tov)); + + c->flogi_retries = + min_t(u32, VNIC_FNIC_FLOGI_RETRIES_MAX, c->flogi_retries); + + c->flogi_timeout = + min_t(u32, VNIC_FNIC_FLOGI_TIMEOUT_MAX, + max_t(u32, VNIC_FNIC_FLOGI_TIMEOUT_MIN, + c->flogi_timeout)); + + c->plogi_retries = + min_t(u32, VNIC_FNIC_PLOGI_RETRIES_MAX, c->plogi_retries); + + c->plogi_timeout = + min_t(u32, VNIC_FNIC_PLOGI_TIMEOUT_MAX, + max_t(u32, VNIC_FNIC_PLOGI_TIMEOUT_MIN, + c->plogi_timeout)); + + c->io_throttle_count = + min_t(u32, VNIC_FNIC_IO_THROTTLE_COUNT_MAX, + max_t(u32, VNIC_FNIC_IO_THROTTLE_COUNT_MIN, + c->io_throttle_count)); + + c->link_down_timeout = + min_t(u32, VNIC_FNIC_LINK_DOWN_TIMEOUT_MAX, + c->link_down_timeout); + + c->port_down_timeout = + min_t(u32, VNIC_FNIC_PORT_DOWN_TIMEOUT_MAX, + c->port_down_timeout); + + c->port_down_io_retries = + min_t(u32, VNIC_FNIC_PORT_DOWN_IO_RETRIES_MAX, + c->port_down_io_retries); + + c->luns_per_tgt = + min_t(u32, VNIC_FNIC_LUNS_PER_TARGET_MAX, + max_t(u32, VNIC_FNIC_LUNS_PER_TARGET_MIN, + c->luns_per_tgt)); + + c->intr_timer = min_t(u16, VNIC_INTR_TIMER_MAX, c->intr_timer); + c->intr_timer_type = c->intr_timer_type; + + shost_printk(KERN_INFO, fnic->lport->host, + "vNIC MAC addr %02x:%02x:%02x:%02x:%02x:%02x " + "wq/wq_copy/rq %d/%d/%d\n", + fnic->mac_addr[0], fnic->mac_addr[1], fnic->mac_addr[2], + fnic->mac_addr[3], fnic->mac_addr[4], fnic->mac_addr[5], + c->wq_enet_desc_count, c->wq_copy_desc_count, + c->rq_desc_count); + shost_printk(KERN_INFO, fnic->lport->host, + "vNIC node wwn %llx port wwn %llx\n", + c->node_wwn, c->port_wwn); + shost_printk(KERN_INFO, fnic->lport->host, + "vNIC ed_tov %d ra_tov %d\n", + c->ed_tov, c->ra_tov); + shost_printk(KERN_INFO, fnic->lport->host, + "vNIC mtu %d intr timer %d\n", + c->maxdatafieldsize, c->intr_timer); + shost_printk(KERN_INFO, fnic->lport->host, + "vNIC flags 0x%x luns per tgt %d\n", + c->flags, c->luns_per_tgt); + shost_printk(KERN_INFO, fnic->lport->host, + "vNIC flogi_retries %d flogi timeout %d\n", + c->flogi_retries, c->flogi_timeout); + shost_printk(KERN_INFO, fnic->lport->host, + "vNIC plogi retries %d plogi timeout %d\n", + c->plogi_retries, c->plogi_timeout); + shost_printk(KERN_INFO, fnic->lport->host, + "vNIC io throttle count %d link dn timeout %d\n", + c->io_throttle_count, c->link_down_timeout); + shost_printk(KERN_INFO, fnic->lport->host, + "vNIC port dn io retries %d port dn timeout %d\n", + c->port_down_io_retries, c->port_down_timeout); + + return 0; +} + +int fnic_set_nic_config(struct fnic *fnic, u8 rss_default_cpu, + u8 rss_hash_type, + u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable, + u8 tso_ipid_split_en, u8 ig_vlan_strip_en) +{ + u64 a0, a1; + u32 nic_cfg; + int wait = 1000; + + vnic_set_nic_cfg(&nic_cfg, rss_default_cpu, + rss_hash_type, rss_hash_bits, rss_base_cpu, + rss_enable, tso_ipid_split_en, ig_vlan_strip_en); + + a0 = nic_cfg; + a1 = 0; + + return vnic_dev_cmd(fnic->vdev, CMD_NIC_CFG, &a0, &a1, wait); +} + +void fnic_get_res_counts(struct fnic *fnic) +{ + fnic->wq_count = vnic_dev_get_res_count(fnic->vdev, RES_TYPE_WQ); + fnic->raw_wq_count = fnic->wq_count - 1; + fnic->wq_copy_count = fnic->wq_count - fnic->raw_wq_count; + fnic->rq_count = vnic_dev_get_res_count(fnic->vdev, RES_TYPE_RQ); + fnic->cq_count = vnic_dev_get_res_count(fnic->vdev, RES_TYPE_CQ); + fnic->intr_count = vnic_dev_get_res_count(fnic->vdev, + RES_TYPE_INTR_CTRL); +} + +void fnic_free_vnic_resources(struct fnic *fnic) +{ + unsigned int i; + + for (i = 0; i < fnic->raw_wq_count; i++) + vnic_wq_free(&fnic->wq[i]); + + for (i = 0; i < fnic->wq_copy_count; i++) + vnic_wq_copy_free(&fnic->wq_copy[i]); + + for (i = 0; i < fnic->rq_count; i++) + vnic_rq_free(&fnic->rq[i]); + + for (i = 0; i < fnic->cq_count; i++) + vnic_cq_free(&fnic->cq[i]); + + for (i = 0; i < fnic->intr_count; i++) + vnic_intr_free(&fnic->intr[i]); +} + +int fnic_alloc_vnic_resources(struct fnic *fnic) +{ + enum vnic_dev_intr_mode intr_mode; + unsigned int mask_on_assertion; + unsigned int interrupt_offset; + unsigned int error_interrupt_enable; + unsigned int error_interrupt_offset; + unsigned int i, cq_index; + unsigned int wq_copy_cq_desc_count; + int err; + + intr_mode = vnic_dev_get_intr_mode(fnic->vdev); + + shost_printk(KERN_INFO, fnic->lport->host, "vNIC interrupt mode: %s\n", + intr_mode == VNIC_DEV_INTR_MODE_INTX ? "legacy PCI INTx" : + intr_mode == VNIC_DEV_INTR_MODE_MSI ? "MSI" : + intr_mode == VNIC_DEV_INTR_MODE_MSIX ? + "MSI-X" : "unknown"); + + shost_printk(KERN_INFO, fnic->lport->host, "vNIC resources avail: " + "wq %d cp_wq %d raw_wq %d rq %d cq %d intr %d\n", + fnic->wq_count, fnic->wq_copy_count, fnic->raw_wq_count, + fnic->rq_count, fnic->cq_count, fnic->intr_count); + + /* Allocate Raw WQ used for FCS frames */ + for (i = 0; i < fnic->raw_wq_count; i++) { + err = vnic_wq_alloc(fnic->vdev, &fnic->wq[i], i, + fnic->config.wq_enet_desc_count, + sizeof(struct wq_enet_desc)); + if (err) + goto err_out_cleanup; + } + + /* Allocate Copy WQs used for SCSI IOs */ + for (i = 0; i < fnic->wq_copy_count; i++) { + err = vnic_wq_copy_alloc(fnic->vdev, &fnic->wq_copy[i], + (fnic->raw_wq_count + i), + fnic->config.wq_copy_desc_count, + sizeof(struct fcpio_host_req)); + if (err) + goto err_out_cleanup; + } + + /* RQ for receiving FCS frames */ + for (i = 0; i < fnic->rq_count; i++) { + err = vnic_rq_alloc(fnic->vdev, &fnic->rq[i], i, + fnic->config.rq_desc_count, + sizeof(struct rq_enet_desc)); + if (err) + goto err_out_cleanup; + } + + /* CQ for each RQ */ + for (i = 0; i < fnic->rq_count; i++) { + cq_index = i; + err = vnic_cq_alloc(fnic->vdev, + &fnic->cq[cq_index], cq_index, + fnic->config.rq_desc_count, + sizeof(struct cq_enet_rq_desc)); + if (err) + goto err_out_cleanup; + } + + /* CQ for each WQ */ + for (i = 0; i < fnic->raw_wq_count; i++) { + cq_index = fnic->rq_count + i; + err = vnic_cq_alloc(fnic->vdev, &fnic->cq[cq_index], cq_index, + fnic->config.wq_enet_desc_count, + sizeof(struct cq_enet_wq_desc)); + if (err) + goto err_out_cleanup; + } + + /* CQ for each COPY WQ */ + wq_copy_cq_desc_count = (fnic->config.wq_copy_desc_count * 3); + for (i = 0; i < fnic->wq_copy_count; i++) { + cq_index = fnic->raw_wq_count + fnic->rq_count + i; + err = vnic_cq_alloc(fnic->vdev, &fnic->cq[cq_index], + cq_index, + wq_copy_cq_desc_count, + sizeof(struct fcpio_fw_req)); + if (err) + goto err_out_cleanup; + } + + for (i = 0; i < fnic->intr_count; i++) { + err = vnic_intr_alloc(fnic->vdev, &fnic->intr[i], i); + if (err) + goto err_out_cleanup; + } + + fnic->legacy_pba = vnic_dev_get_res(fnic->vdev, + RES_TYPE_INTR_PBA_LEGACY, 0); + + if (!fnic->legacy_pba && intr_mode == VNIC_DEV_INTR_MODE_INTX) { + shost_printk(KERN_ERR, fnic->lport->host, + "Failed to hook legacy pba resource\n"); + err = -ENODEV; + goto err_out_cleanup; + } + + /* + * Init RQ/WQ resources. + * + * RQ[0 to n-1] point to CQ[0 to n-1] + * WQ[0 to m-1] point to CQ[n to n+m-1] + * WQ_COPY[0 to k-1] points to CQ[n+m to n+m+k-1] + * + * Note for copy wq we always initialize with cq_index = 0 + * + * Error interrupt is not enabled for MSI. + */ + + switch (intr_mode) { + case VNIC_DEV_INTR_MODE_INTX: + case VNIC_DEV_INTR_MODE_MSIX: + error_interrupt_enable = 1; + error_interrupt_offset = fnic->err_intr_offset; + break; + default: + error_interrupt_enable = 0; + error_interrupt_offset = 0; + break; + } + + for (i = 0; i < fnic->rq_count; i++) { + cq_index = i; + vnic_rq_init(&fnic->rq[i], + cq_index, + error_interrupt_enable, + error_interrupt_offset); + } + + for (i = 0; i < fnic->raw_wq_count; i++) { + cq_index = i + fnic->rq_count; + vnic_wq_init(&fnic->wq[i], + cq_index, + error_interrupt_enable, + error_interrupt_offset); + } + + for (i = 0; i < fnic->wq_copy_count; i++) { + vnic_wq_copy_init(&fnic->wq_copy[i], + 0 /* cq_index 0 - always */, + error_interrupt_enable, + error_interrupt_offset); + } + + for (i = 0; i < fnic->cq_count; i++) { + + switch (intr_mode) { + case VNIC_DEV_INTR_MODE_MSIX: + interrupt_offset = i; + break; + default: + interrupt_offset = 0; + break; + } + + vnic_cq_init(&fnic->cq[i], + 0 /* flow_control_enable */, + 1 /* color_enable */, + 0 /* cq_head */, + 0 /* cq_tail */, + 1 /* cq_tail_color */, + 1 /* interrupt_enable */, + 1 /* cq_entry_enable */, + 0 /* cq_message_enable */, + interrupt_offset, + 0 /* cq_message_addr */); + } + + /* + * Init INTR resources + * + * mask_on_assertion is not used for INTx due to the level- + * triggered nature of INTx + */ + + switch (intr_mode) { + case VNIC_DEV_INTR_MODE_MSI: + case VNIC_DEV_INTR_MODE_MSIX: + mask_on_assertion = 1; + break; + default: + mask_on_assertion = 0; + break; + } + + for (i = 0; i < fnic->intr_count; i++) { + vnic_intr_init(&fnic->intr[i], + fnic->config.intr_timer, + fnic->config.intr_timer_type, + mask_on_assertion); + } + + /* init the stats memory by making the first call here */ + err = vnic_dev_stats_dump(fnic->vdev, &fnic->stats); + if (err) { + shost_printk(KERN_ERR, fnic->lport->host, + "vnic_dev_stats_dump failed - x%x\n", err); + goto err_out_cleanup; + } + + /* Clear LIF stats */ + vnic_dev_stats_clear(fnic->vdev); + + return 0; + +err_out_cleanup: + fnic_free_vnic_resources(fnic); + + return err; +} diff --git a/drivers/scsi/fnic/fnic_res.h b/drivers/scsi/fnic/fnic_res.h new file mode 100644 index 000000000000..b6f310262534 --- /dev/null +++ b/drivers/scsi/fnic/fnic_res.h @@ -0,0 +1,197 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _FNIC_RES_H_ +#define _FNIC_RES_H_ + +#include "wq_enet_desc.h" +#include "rq_enet_desc.h" +#include "vnic_wq.h" +#include "vnic_rq.h" +#include "fnic_io.h" +#include "fcpio.h" +#include "vnic_wq_copy.h" +#include "vnic_cq_copy.h" + +static inline void fnic_queue_wq_desc(struct vnic_wq *wq, + void *os_buf, dma_addr_t dma_addr, + unsigned int len, unsigned int fc_eof, + int vlan_tag_insert, + unsigned int vlan_tag, + int cq_entry, int sop, int eop) +{ + struct wq_enet_desc *desc = vnic_wq_next_desc(wq); + + wq_enet_desc_enc(desc, + (u64)dma_addr | VNIC_PADDR_TARGET, + (u16)len, + 0, /* mss_or_csum_offset */ + (u16)fc_eof, + 0, /* offload_mode */ + (u8)eop, (u8)cq_entry, + 1, /* fcoe_encap */ + (u8)vlan_tag_insert, + (u16)vlan_tag, + 0 /* loopback */); + + vnic_wq_post(wq, os_buf, dma_addr, len, sop, eop); +} + +static inline void fnic_queue_wq_copy_desc_icmnd_16(struct vnic_wq_copy *wq, + u32 req_id, + u32 lunmap_id, u8 spl_flags, + u32 sgl_cnt, u32 sense_len, + u64 sgl_addr, u64 sns_addr, + u8 crn, u8 pri_ta, + u8 flags, u8 *scsi_cdb, + u32 data_len, u8 *lun, + u32 d_id, u16 mss, + u32 ratov, u32 edtov) +{ + struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq); + + desc->hdr.type = FCPIO_ICMND_16; /* enum fcpio_type */ + desc->hdr.status = 0; /* header status entry */ + desc->hdr._resvd = 0; /* reserved */ + desc->hdr.tag.u.req_id = req_id; /* id for this request */ + + desc->u.icmnd_16.lunmap_id = lunmap_id; /* index into lunmap table */ + desc->u.icmnd_16.special_req_flags = spl_flags; /* exch req flags */ + desc->u.icmnd_16._resvd0[0] = 0; /* reserved */ + desc->u.icmnd_16._resvd0[1] = 0; /* reserved */ + desc->u.icmnd_16._resvd0[2] = 0; /* reserved */ + desc->u.icmnd_16.sgl_cnt = sgl_cnt; /* scatter-gather list count */ + desc->u.icmnd_16.sense_len = sense_len; /* sense buffer length */ + desc->u.icmnd_16.sgl_addr = sgl_addr; /* scatter-gather list addr */ + desc->u.icmnd_16.sense_addr = sns_addr; /* sense buffer address */ + desc->u.icmnd_16.crn = crn; /* SCSI Command Reference No.*/ + desc->u.icmnd_16.pri_ta = pri_ta; /* SCSI Pri & Task attribute */ + desc->u.icmnd_16._resvd1 = 0; /* reserved: should be 0 */ + desc->u.icmnd_16.flags = flags; /* command flags */ + memcpy(desc->u.icmnd_16.scsi_cdb, scsi_cdb, CDB_16); /* SCSI CDB */ + desc->u.icmnd_16.data_len = data_len; /* length of data expected */ + memcpy(desc->u.icmnd_16.lun, lun, LUN_ADDRESS); /* LUN address */ + desc->u.icmnd_16._resvd2 = 0; /* reserved */ + hton24(desc->u.icmnd_16.d_id, d_id); /* FC vNIC only: Target D_ID */ + desc->u.icmnd_16.mss = mss; /* FC vNIC only: max burst */ + desc->u.icmnd_16.r_a_tov = ratov; /*FC vNIC only: Res. Alloc Timeout */ + desc->u.icmnd_16.e_d_tov = edtov; /*FC vNIC only: Err Detect Timeout */ + + vnic_wq_copy_post(wq); +} + +static inline void fnic_queue_wq_copy_desc_itmf(struct vnic_wq_copy *wq, + u32 req_id, u32 lunmap_id, + u32 tm_req, u32 tm_id, u8 *lun, + u32 d_id, u32 r_a_tov, + u32 e_d_tov) +{ + struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq); + + desc->hdr.type = FCPIO_ITMF; /* enum fcpio_type */ + desc->hdr.status = 0; /* header status entry */ + desc->hdr._resvd = 0; /* reserved */ + desc->hdr.tag.u.req_id = req_id; /* id for this request */ + + desc->u.itmf.lunmap_id = lunmap_id; /* index into lunmap table */ + desc->u.itmf.tm_req = tm_req; /* SCSI Task Management request */ + desc->u.itmf.t_tag = tm_id; /* tag of fcpio to be aborted */ + desc->u.itmf._resvd = 0; + memcpy(desc->u.itmf.lun, lun, LUN_ADDRESS); /* LUN address */ + desc->u.itmf._resvd1 = 0; + hton24(desc->u.itmf.d_id, d_id); /* FC vNIC only: Target D_ID */ + desc->u.itmf.r_a_tov = r_a_tov; /* FC vNIC only: R_A_TOV in msec */ + desc->u.itmf.e_d_tov = e_d_tov; /* FC vNIC only: E_D_TOV in msec */ + + vnic_wq_copy_post(wq); +} + +static inline void fnic_queue_wq_copy_desc_flogi_reg(struct vnic_wq_copy *wq, + u32 req_id, u8 format, + u32 s_id, u8 *gw_mac) +{ + struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq); + + desc->hdr.type = FCPIO_FLOGI_REG; /* enum fcpio_type */ + desc->hdr.status = 0; /* header status entry */ + desc->hdr._resvd = 0; /* reserved */ + desc->hdr.tag.u.req_id = req_id; /* id for this request */ + + desc->u.flogi_reg.format = format; + hton24(desc->u.flogi_reg.s_id, s_id); + memcpy(desc->u.flogi_reg.gateway_mac, gw_mac, ETH_ALEN); + + vnic_wq_copy_post(wq); +} + +static inline void fnic_queue_wq_copy_desc_fw_reset(struct vnic_wq_copy *wq, + u32 req_id) +{ + struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq); + + desc->hdr.type = FCPIO_RESET; /* enum fcpio_type */ + desc->hdr.status = 0; /* header status entry */ + desc->hdr._resvd = 0; /* reserved */ + desc->hdr.tag.u.req_id = req_id; /* id for this request */ + + vnic_wq_copy_post(wq); +} + +static inline void fnic_queue_wq_copy_desc_lunmap(struct vnic_wq_copy *wq, + u32 req_id, u64 lunmap_addr, + u32 lunmap_len) +{ + struct fcpio_host_req *desc = vnic_wq_copy_next_desc(wq); + + desc->hdr.type = FCPIO_LUNMAP_REQ; /* enum fcpio_type */ + desc->hdr.status = 0; /* header status entry */ + desc->hdr._resvd = 0; /* reserved */ + desc->hdr.tag.u.req_id = req_id; /* id for this request */ + + desc->u.lunmap_req.addr = lunmap_addr; /* address of the buffer */ + desc->u.lunmap_req.len = lunmap_len; /* len of the buffer */ + + vnic_wq_copy_post(wq); +} + +static inline void fnic_queue_rq_desc(struct vnic_rq *rq, + void *os_buf, dma_addr_t dma_addr, + u16 len) +{ + struct rq_enet_desc *desc = vnic_rq_next_desc(rq); + + rq_enet_desc_enc(desc, + (u64)dma_addr | VNIC_PADDR_TARGET, + RQ_ENET_TYPE_ONLY_SOP, + (u16)len); + + vnic_rq_post(rq, os_buf, 0, dma_addr, len); +} + + +struct fnic; + +int fnic_get_vnic_config(struct fnic *); +int fnic_alloc_vnic_resources(struct fnic *); +void fnic_free_vnic_resources(struct fnic *); +void fnic_get_res_counts(struct fnic *); +int fnic_set_nic_config(struct fnic *fnic, u8 rss_default_cpu, + u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, + u8 rss_enable, u8 tso_ipid_split_en, + u8 ig_vlan_strip_en); + +#endif /* _FNIC_RES_H_ */ diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c new file mode 100644 index 000000000000..eabf36502856 --- /dev/null +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -0,0 +1,1850 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/mempool.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/workqueue.h> +#include <linux/pci.h> +#include <linux/scatterlist.h> +#include <linux/skbuff.h> +#include <linux/spinlock.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/delay.h> +#include <scsi/scsi.h> +#include <scsi/scsi_host.h> +#include <scsi/scsi_device.h> +#include <scsi/scsi_cmnd.h> +#include <scsi/scsi_tcq.h> +#include <scsi/fc/fc_els.h> +#include <scsi/fc/fc_fcoe.h> +#include <scsi/libfc.h> +#include <scsi/fc_frame.h> +#include "fnic_io.h" +#include "fnic.h" + +const char *fnic_state_str[] = { + [FNIC_IN_FC_MODE] = "FNIC_IN_FC_MODE", + [FNIC_IN_FC_TRANS_ETH_MODE] = "FNIC_IN_FC_TRANS_ETH_MODE", + [FNIC_IN_ETH_MODE] = "FNIC_IN_ETH_MODE", + [FNIC_IN_ETH_TRANS_FC_MODE] = "FNIC_IN_ETH_TRANS_FC_MODE", +}; + +static const char *fnic_ioreq_state_str[] = { + [FNIC_IOREQ_CMD_PENDING] = "FNIC_IOREQ_CMD_PENDING", + [FNIC_IOREQ_ABTS_PENDING] = "FNIC_IOREQ_ABTS_PENDING", + [FNIC_IOREQ_ABTS_COMPLETE] = "FNIC_IOREQ_ABTS_COMPLETE", + [FNIC_IOREQ_CMD_COMPLETE] = "FNIC_IOREQ_CMD_COMPLETE", +}; + +static const char *fcpio_status_str[] = { + [FCPIO_SUCCESS] = "FCPIO_SUCCESS", /*0x0*/ + [FCPIO_INVALID_HEADER] = "FCPIO_INVALID_HEADER", + [FCPIO_OUT_OF_RESOURCE] = "FCPIO_OUT_OF_RESOURCE", + [FCPIO_INVALID_PARAM] = "FCPIO_INVALID_PARAM]", + [FCPIO_REQ_NOT_SUPPORTED] = "FCPIO_REQ_NOT_SUPPORTED", + [FCPIO_IO_NOT_FOUND] = "FCPIO_IO_NOT_FOUND", + [FCPIO_ABORTED] = "FCPIO_ABORTED", /*0x41*/ + [FCPIO_TIMEOUT] = "FCPIO_TIMEOUT", + [FCPIO_SGL_INVALID] = "FCPIO_SGL_INVALID", + [FCPIO_MSS_INVALID] = "FCPIO_MSS_INVALID", + [FCPIO_DATA_CNT_MISMATCH] = "FCPIO_DATA_CNT_MISMATCH", + [FCPIO_FW_ERR] = "FCPIO_FW_ERR", + [FCPIO_ITMF_REJECTED] = "FCPIO_ITMF_REJECTED", + [FCPIO_ITMF_FAILED] = "FCPIO_ITMF_FAILED", + [FCPIO_ITMF_INCORRECT_LUN] = "FCPIO_ITMF_INCORRECT_LUN", + [FCPIO_CMND_REJECTED] = "FCPIO_CMND_REJECTED", + [FCPIO_NO_PATH_AVAIL] = "FCPIO_NO_PATH_AVAIL", + [FCPIO_PATH_FAILED] = "FCPIO_PATH_FAILED", + [FCPIO_LUNMAP_CHNG_PEND] = "FCPIO_LUNHMAP_CHNG_PEND", +}; + +const char *fnic_state_to_str(unsigned int state) +{ + if (state >= ARRAY_SIZE(fnic_state_str) || !fnic_state_str[state]) + return "unknown"; + + return fnic_state_str[state]; +} + +static const char *fnic_ioreq_state_to_str(unsigned int state) +{ + if (state >= ARRAY_SIZE(fnic_ioreq_state_str) || + !fnic_ioreq_state_str[state]) + return "unknown"; + + return fnic_ioreq_state_str[state]; +} + +static const char *fnic_fcpio_status_to_str(unsigned int status) +{ + if (status >= ARRAY_SIZE(fcpio_status_str) || !fcpio_status_str[status]) + return "unknown"; + + return fcpio_status_str[status]; +} + +static void fnic_cleanup_io(struct fnic *fnic, int exclude_id); + +static inline spinlock_t *fnic_io_lock_hash(struct fnic *fnic, + struct scsi_cmnd *sc) +{ + u32 hash = sc->request->tag & (FNIC_IO_LOCKS - 1); + + return &fnic->io_req_lock[hash]; +} + +/* + * Unmap the data buffer and sense buffer for an io_req, + * also unmap and free the device-private scatter/gather list. + */ +static void fnic_release_ioreq_buf(struct fnic *fnic, + struct fnic_io_req *io_req, + struct scsi_cmnd *sc) +{ + if (io_req->sgl_list_pa) + pci_unmap_single(fnic->pdev, io_req->sgl_list_pa, + sizeof(io_req->sgl_list[0]) * io_req->sgl_cnt, + PCI_DMA_TODEVICE); + scsi_dma_unmap(sc); + + if (io_req->sgl_cnt) + mempool_free(io_req->sgl_list_alloc, + fnic->io_sgl_pool[io_req->sgl_type]); + if (io_req->sense_buf_pa) + pci_unmap_single(fnic->pdev, io_req->sense_buf_pa, + SCSI_SENSE_BUFFERSIZE, PCI_DMA_FROMDEVICE); +} + +/* Free up Copy Wq descriptors. Called with copy_wq lock held */ +static int free_wq_copy_descs(struct fnic *fnic, struct vnic_wq_copy *wq) +{ + /* if no Ack received from firmware, then nothing to clean */ + if (!fnic->fw_ack_recd[0]) + return 1; + + /* + * Update desc_available count based on number of freed descriptors + * Account for wraparound + */ + if (wq->to_clean_index <= fnic->fw_ack_index[0]) + wq->ring.desc_avail += (fnic->fw_ack_index[0] + - wq->to_clean_index + 1); + else + wq->ring.desc_avail += (wq->ring.desc_count + - wq->to_clean_index + + fnic->fw_ack_index[0] + 1); + + /* + * just bump clean index to ack_index+1 accounting for wraparound + * this will essentially free up all descriptors between + * to_clean_index and fw_ack_index, both inclusive + */ + wq->to_clean_index = + (fnic->fw_ack_index[0] + 1) % wq->ring.desc_count; + + /* we have processed the acks received so far */ + fnic->fw_ack_recd[0] = 0; + return 0; +} + + +/* + * fnic_fw_reset_handler + * Routine to send reset msg to fw + */ +int fnic_fw_reset_handler(struct fnic *fnic) +{ + struct vnic_wq_copy *wq = &fnic->wq_copy[0]; + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&fnic->wq_copy_lock[0], flags); + + if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0]) + free_wq_copy_descs(fnic, wq); + + if (!vnic_wq_copy_desc_avail(wq)) + ret = -EAGAIN; + else + fnic_queue_wq_copy_desc_fw_reset(wq, SCSI_NO_TAG); + + spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags); + + if (!ret) + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Issued fw reset\n"); + else + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Failed to issue fw reset\n"); + return ret; +} + + +/* + * fnic_flogi_reg_handler + * Routine to send flogi register msg to fw + */ +int fnic_flogi_reg_handler(struct fnic *fnic) +{ + struct vnic_wq_copy *wq = &fnic->wq_copy[0]; + u8 gw_mac[ETH_ALEN]; + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&fnic->wq_copy_lock[0], flags); + + if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0]) + free_wq_copy_descs(fnic, wq); + + if (!vnic_wq_copy_desc_avail(wq)) { + ret = -EAGAIN; + goto flogi_reg_ioreq_end; + } + + if (fnic->fcoui_mode) + memset(gw_mac, 0xff, ETH_ALEN); + else + memcpy(gw_mac, fnic->dest_addr, ETH_ALEN); + + fnic_queue_wq_copy_desc_flogi_reg(wq, SCSI_NO_TAG, + FCPIO_FLOGI_REG_GW_DEST, + fnic->s_id, + gw_mac); + +flogi_reg_ioreq_end: + spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags); + + if (!ret) + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "flog reg issued\n"); + + return ret; +} + +/* + * fnic_queue_wq_copy_desc + * Routine to enqueue a wq copy desc + */ +static inline int fnic_queue_wq_copy_desc(struct fnic *fnic, + struct vnic_wq_copy *wq, + struct fnic_io_req *io_req, + struct scsi_cmnd *sc, + u32 sg_count) +{ + struct scatterlist *sg; + struct fc_rport *rport = starget_to_rport(scsi_target(sc->device)); + struct fc_rport_libfc_priv *rp = rport->dd_data; + struct host_sg_desc *desc; + u8 pri_tag = 0; + unsigned int i; + unsigned long intr_flags; + int flags; + u8 exch_flags; + struct scsi_lun fc_lun; + char msg[2]; + + if (sg_count) { + BUG_ON(sg_count < 0); + BUG_ON(sg_count > FNIC_MAX_SG_DESC_CNT); + + /* For each SGE, create a device desc entry */ + desc = io_req->sgl_list; + for_each_sg(scsi_sglist(sc), sg, sg_count, i) { + desc->addr = cpu_to_le64(sg_dma_address(sg)); + desc->len = cpu_to_le32(sg_dma_len(sg)); + desc->_resvd = 0; + desc++; + } + + io_req->sgl_list_pa = pci_map_single + (fnic->pdev, + io_req->sgl_list, + sizeof(io_req->sgl_list[0]) * sg_count, + PCI_DMA_TODEVICE); + } + + io_req->sense_buf_pa = pci_map_single(fnic->pdev, + sc->sense_buffer, + SCSI_SENSE_BUFFERSIZE, + PCI_DMA_FROMDEVICE); + + int_to_scsilun(sc->device->lun, &fc_lun); + + pri_tag = FCPIO_ICMND_PTA_SIMPLE; + msg[0] = MSG_SIMPLE_TAG; + scsi_populate_tag_msg(sc, msg); + if (msg[0] == MSG_ORDERED_TAG) + pri_tag = FCPIO_ICMND_PTA_ORDERED; + + /* Enqueue the descriptor in the Copy WQ */ + spin_lock_irqsave(&fnic->wq_copy_lock[0], intr_flags); + + if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0]) + free_wq_copy_descs(fnic, wq); + + if (unlikely(!vnic_wq_copy_desc_avail(wq))) { + spin_unlock_irqrestore(&fnic->wq_copy_lock[0], intr_flags); + return SCSI_MLQUEUE_HOST_BUSY; + } + + flags = 0; + if (sc->sc_data_direction == DMA_FROM_DEVICE) + flags = FCPIO_ICMND_RDDATA; + else if (sc->sc_data_direction == DMA_TO_DEVICE) + flags = FCPIO_ICMND_WRDATA; + + exch_flags = 0; + if ((fnic->config.flags & VFCF_FCP_SEQ_LVL_ERR) && + (rp->flags & FC_RP_FLAGS_RETRY)) + exch_flags |= FCPIO_ICMND_SRFLAG_RETRY; + + fnic_queue_wq_copy_desc_icmnd_16(wq, sc->request->tag, + 0, exch_flags, io_req->sgl_cnt, + SCSI_SENSE_BUFFERSIZE, + io_req->sgl_list_pa, + io_req->sense_buf_pa, + 0, /* scsi cmd ref, always 0 */ + pri_tag, /* scsi pri and tag */ + flags, /* command flags */ + sc->cmnd, scsi_bufflen(sc), + fc_lun.scsi_lun, io_req->port_id, + rport->maxframe_size, rp->r_a_tov, + rp->e_d_tov); + + spin_unlock_irqrestore(&fnic->wq_copy_lock[0], intr_flags); + return 0; +} + +/* + * fnic_queuecommand + * Routine to send a scsi cdb + * Called with host_lock held and interrupts disabled. + */ +int fnic_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) +{ + struct fc_lport *lp; + struct fc_rport *rport; + struct fnic_io_req *io_req; + struct fnic *fnic; + struct vnic_wq_copy *wq; + int ret; + u32 sg_count; + unsigned long flags; + unsigned long ptr; + + rport = starget_to_rport(scsi_target(sc->device)); + ret = fc_remote_port_chkready(rport); + if (ret) { + sc->result = ret; + done(sc); + return 0; + } + + lp = shost_priv(sc->device->host); + if (lp->state != LPORT_ST_READY || !(lp->link_up)) + return SCSI_MLQUEUE_HOST_BUSY; + + /* + * Release host lock, use driver resource specific locks from here. + * Don't re-enable interrupts in case they were disabled prior to the + * caller disabling them. + */ + spin_unlock(lp->host->host_lock); + + /* Get a new io_req for this SCSI IO */ + fnic = lport_priv(lp); + + io_req = mempool_alloc(fnic->io_req_pool, GFP_ATOMIC); + if (!io_req) { + ret = SCSI_MLQUEUE_HOST_BUSY; + goto out; + } + memset(io_req, 0, sizeof(*io_req)); + + /* Map the data buffer */ + sg_count = scsi_dma_map(sc); + if (sg_count < 0) { + mempool_free(io_req, fnic->io_req_pool); + goto out; + } + + /* Determine the type of scatter/gather list we need */ + io_req->sgl_cnt = sg_count; + io_req->sgl_type = FNIC_SGL_CACHE_DFLT; + if (sg_count > FNIC_DFLT_SG_DESC_CNT) + io_req->sgl_type = FNIC_SGL_CACHE_MAX; + + if (sg_count) { + io_req->sgl_list = + mempool_alloc(fnic->io_sgl_pool[io_req->sgl_type], + GFP_ATOMIC | GFP_DMA); + if (!io_req->sgl_list) { + ret = SCSI_MLQUEUE_HOST_BUSY; + scsi_dma_unmap(sc); + mempool_free(io_req, fnic->io_req_pool); + goto out; + } + + /* Cache sgl list allocated address before alignment */ + io_req->sgl_list_alloc = io_req->sgl_list; + ptr = (unsigned long) io_req->sgl_list; + if (ptr % FNIC_SG_DESC_ALIGN) { + io_req->sgl_list = (struct host_sg_desc *) + (((unsigned long) ptr + + FNIC_SG_DESC_ALIGN - 1) + & ~(FNIC_SG_DESC_ALIGN - 1)); + } + } + + /* initialize rest of io_req */ + io_req->port_id = rport->port_id; + CMD_STATE(sc) = FNIC_IOREQ_CMD_PENDING; + CMD_SP(sc) = (char *)io_req; + sc->scsi_done = done; + + /* create copy wq desc and enqueue it */ + wq = &fnic->wq_copy[0]; + ret = fnic_queue_wq_copy_desc(fnic, wq, io_req, sc, sg_count); + if (ret) { + /* + * In case another thread cancelled the request, + * refetch the pointer under the lock. + */ + spinlock_t *io_lock = fnic_io_lock_hash(fnic, sc); + + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + CMD_SP(sc) = NULL; + CMD_STATE(sc) = FNIC_IOREQ_CMD_COMPLETE; + spin_unlock_irqrestore(io_lock, flags); + if (io_req) { + fnic_release_ioreq_buf(fnic, io_req, sc); + mempool_free(io_req, fnic->io_req_pool); + } + } +out: + /* acquire host lock before returning to SCSI */ + spin_lock(lp->host->host_lock); + return ret; +} + +/* + * fnic_fcpio_fw_reset_cmpl_handler + * Routine to handle fw reset completion + */ +static int fnic_fcpio_fw_reset_cmpl_handler(struct fnic *fnic, + struct fcpio_fw_req *desc) +{ + u8 type; + u8 hdr_status; + struct fcpio_tag tag; + int ret = 0; + struct fc_frame *flogi; + unsigned long flags; + + fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag); + + /* Clean up all outstanding io requests */ + fnic_cleanup_io(fnic, SCSI_NO_TAG); + + spin_lock_irqsave(&fnic->fnic_lock, flags); + + flogi = fnic->flogi; + fnic->flogi = NULL; + + /* fnic should be in FC_TRANS_ETH_MODE */ + if (fnic->state == FNIC_IN_FC_TRANS_ETH_MODE) { + /* Check status of reset completion */ + if (!hdr_status) { + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "reset cmpl success\n"); + /* Ready to send flogi out */ + fnic->state = FNIC_IN_ETH_MODE; + } else { + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, + "fnic fw_reset : failed %s\n", + fnic_fcpio_status_to_str(hdr_status)); + + /* + * Unable to change to eth mode, cannot send out flogi + * Change state to fc mode, so that subsequent Flogi + * requests from libFC will cause more attempts to + * reset the firmware. Free the cached flogi + */ + fnic->state = FNIC_IN_FC_MODE; + ret = -1; + } + } else { + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, + "Unexpected state %s while processing" + " reset cmpl\n", fnic_state_to_str(fnic->state)); + ret = -1; + } + + /* Thread removing device blocks till firmware reset is complete */ + if (fnic->remove_wait) + complete(fnic->remove_wait); + + /* + * If fnic is being removed, or fw reset failed + * free the flogi frame. Else, send it out + */ + if (fnic->remove_wait || ret) { + fnic->flogi_oxid = FC_XID_UNKNOWN; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + if (flogi) + dev_kfree_skb_irq(fp_skb(flogi)); + goto reset_cmpl_handler_end; + } + + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + if (flogi) + ret = fnic_send_frame(fnic, flogi); + + reset_cmpl_handler_end: + return ret; +} + +/* + * fnic_fcpio_flogi_reg_cmpl_handler + * Routine to handle flogi register completion + */ +static int fnic_fcpio_flogi_reg_cmpl_handler(struct fnic *fnic, + struct fcpio_fw_req *desc) +{ + u8 type; + u8 hdr_status; + struct fcpio_tag tag; + int ret = 0; + struct fc_frame *flogi_resp = NULL; + unsigned long flags; + struct sk_buff *skb; + + fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag); + + /* Update fnic state based on status of flogi reg completion */ + spin_lock_irqsave(&fnic->fnic_lock, flags); + + flogi_resp = fnic->flogi_resp; + fnic->flogi_resp = NULL; + + if (fnic->state == FNIC_IN_ETH_TRANS_FC_MODE) { + + /* Check flogi registration completion status */ + if (!hdr_status) { + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "flog reg succeeded\n"); + fnic->state = FNIC_IN_FC_MODE; + } else { + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, + "fnic flogi reg :failed %s\n", + fnic_fcpio_status_to_str(hdr_status)); + fnic->state = FNIC_IN_ETH_MODE; + ret = -1; + } + } else { + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Unexpected fnic state %s while" + " processing flogi reg completion\n", + fnic_state_to_str(fnic->state)); + ret = -1; + } + + /* Successful flogi reg cmpl, pass frame to LibFC */ + if (!ret && flogi_resp) { + if (fnic->stop_rx_link_events) { + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + goto reg_cmpl_handler_end; + } + skb = (struct sk_buff *)flogi_resp; + /* Use fr_flags to indicate whether flogi resp or not */ + fr_flags(flogi_resp) = 1; + fr_dev(flogi_resp) = fnic->lport; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + skb_queue_tail(&fnic->frame_queue, skb); + queue_work(fnic_event_queue, &fnic->frame_work); + + } else { + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + if (flogi_resp) + dev_kfree_skb_irq(fp_skb(flogi_resp)); + } + +reg_cmpl_handler_end: + return ret; +} + +static inline int is_ack_index_in_range(struct vnic_wq_copy *wq, + u16 request_out) +{ + if (wq->to_clean_index <= wq->to_use_index) { + /* out of range, stale request_out index */ + if (request_out < wq->to_clean_index || + request_out >= wq->to_use_index) + return 0; + } else { + /* out of range, stale request_out index */ + if (request_out < wq->to_clean_index && + request_out >= wq->to_use_index) + return 0; + } + /* request_out index is in range */ + return 1; +} + + +/* + * Mark that ack received and store the Ack index. If there are multiple + * acks received before Tx thread cleans it up, the latest value will be + * used which is correct behavior. This state should be in the copy Wq + * instead of in the fnic + */ +static inline void fnic_fcpio_ack_handler(struct fnic *fnic, + unsigned int cq_index, + struct fcpio_fw_req *desc) +{ + struct vnic_wq_copy *wq; + u16 request_out = desc->u.ack.request_out; + unsigned long flags; + + /* mark the ack state */ + wq = &fnic->wq_copy[cq_index - fnic->raw_wq_count - fnic->rq_count]; + spin_lock_irqsave(&fnic->wq_copy_lock[0], flags); + + if (is_ack_index_in_range(wq, request_out)) { + fnic->fw_ack_index[0] = request_out; + fnic->fw_ack_recd[0] = 1; + } + spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags); +} + +/* + * fnic_fcpio_icmnd_cmpl_handler + * Routine to handle icmnd completions + */ +static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, + struct fcpio_fw_req *desc) +{ + u8 type; + u8 hdr_status; + struct fcpio_tag tag; + u32 id; + u64 xfer_len = 0; + struct fcpio_icmnd_cmpl *icmnd_cmpl; + struct fnic_io_req *io_req; + struct scsi_cmnd *sc; + unsigned long flags; + spinlock_t *io_lock; + + /* Decode the cmpl description to get the io_req id */ + fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag); + fcpio_tag_id_dec(&tag, &id); + + if (id >= FNIC_MAX_IO_REQ) + return; + + sc = scsi_host_find_tag(fnic->lport->host, id); + WARN_ON_ONCE(!sc); + if (!sc) + return; + + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + WARN_ON_ONCE(!io_req); + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + return; + } + + /* firmware completed the io */ + io_req->io_completed = 1; + + /* + * if SCSI-ML has already issued abort on this command, + * ignore completion of the IO. The abts path will clean it up + */ + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + return; + } + + /* Mark the IO as complete */ + CMD_STATE(sc) = FNIC_IOREQ_CMD_COMPLETE; + + icmnd_cmpl = &desc->u.icmnd_cmpl; + + switch (hdr_status) { + case FCPIO_SUCCESS: + sc->result = (DID_OK << 16) | icmnd_cmpl->scsi_status; + xfer_len = scsi_bufflen(sc); + scsi_set_resid(sc, icmnd_cmpl->residual); + + if (icmnd_cmpl->flags & FCPIO_ICMND_CMPL_RESID_UNDER) + xfer_len -= icmnd_cmpl->residual; + + /* + * If queue_full, then try to reduce queue depth for all + * LUNS on the target. Todo: this should be accompanied + * by a periodic queue_depth rampup based on successful + * IO completion. + */ + if (icmnd_cmpl->scsi_status == QUEUE_FULL) { + struct scsi_device *t_sdev; + int qd = 0; + + shost_for_each_device(t_sdev, sc->device->host) { + if (t_sdev->id != sc->device->id) + continue; + + if (t_sdev->queue_depth > 1) { + qd = scsi_track_queue_full + (t_sdev, + t_sdev->queue_depth - 1); + if (qd == -1) + qd = t_sdev->host->cmd_per_lun; + shost_printk(KERN_INFO, + fnic->lport->host, + "scsi[%d:%d:%d:%d" + "] queue full detected," + "new depth = %d\n", + t_sdev->host->host_no, + t_sdev->channel, + t_sdev->id, t_sdev->lun, + t_sdev->queue_depth); + } + } + } + break; + + case FCPIO_TIMEOUT: /* request was timed out */ + sc->result = (DID_TIME_OUT << 16) | icmnd_cmpl->scsi_status; + break; + + case FCPIO_ABORTED: /* request was aborted */ + sc->result = (DID_ERROR << 16) | icmnd_cmpl->scsi_status; + break; + + case FCPIO_DATA_CNT_MISMATCH: /* recv/sent more/less data than exp. */ + scsi_set_resid(sc, icmnd_cmpl->residual); + sc->result = (DID_ERROR << 16) | icmnd_cmpl->scsi_status; + break; + + case FCPIO_OUT_OF_RESOURCE: /* out of resources to complete request */ + sc->result = (DID_REQUEUE << 16) | icmnd_cmpl->scsi_status; + break; + case FCPIO_INVALID_HEADER: /* header contains invalid data */ + case FCPIO_INVALID_PARAM: /* some parameter in request invalid */ + case FCPIO_REQ_NOT_SUPPORTED:/* request type is not supported */ + case FCPIO_IO_NOT_FOUND: /* requested I/O was not found */ + case FCPIO_SGL_INVALID: /* request was aborted due to sgl error */ + case FCPIO_MSS_INVALID: /* request was aborted due to mss error */ + case FCPIO_FW_ERR: /* request was terminated due fw error */ + default: + shost_printk(KERN_ERR, fnic->lport->host, "hdr status = %s\n", + fnic_fcpio_status_to_str(hdr_status)); + sc->result = (DID_ERROR << 16) | icmnd_cmpl->scsi_status; + break; + } + + /* Break link with the SCSI command */ + CMD_SP(sc) = NULL; + + spin_unlock_irqrestore(io_lock, flags); + + fnic_release_ioreq_buf(fnic, io_req, sc); + + mempool_free(io_req, fnic->io_req_pool); + + if (sc->sc_data_direction == DMA_FROM_DEVICE) { + fnic->lport->host_stats.fcp_input_requests++; + fnic->fcp_input_bytes += xfer_len; + } else if (sc->sc_data_direction == DMA_TO_DEVICE) { + fnic->lport->host_stats.fcp_output_requests++; + fnic->fcp_output_bytes += xfer_len; + } else + fnic->lport->host_stats.fcp_control_requests++; + + /* Call SCSI completion function to complete the IO */ + if (sc->scsi_done) + sc->scsi_done(sc); + +} + +/* fnic_fcpio_itmf_cmpl_handler + * Routine to handle itmf completions + */ +static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic, + struct fcpio_fw_req *desc) +{ + u8 type; + u8 hdr_status; + struct fcpio_tag tag; + u32 id; + struct scsi_cmnd *sc; + struct fnic_io_req *io_req; + unsigned long flags; + spinlock_t *io_lock; + + fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag); + fcpio_tag_id_dec(&tag, &id); + + if ((id & FNIC_TAG_MASK) >= FNIC_MAX_IO_REQ) + return; + + sc = scsi_host_find_tag(fnic->lport->host, id & FNIC_TAG_MASK); + WARN_ON_ONCE(!sc); + if (!sc) + return; + + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + WARN_ON_ONCE(!io_req); + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + return; + } + + if (id & FNIC_TAG_ABORT) { + /* Completion of abort cmd */ + if (CMD_STATE(sc) != FNIC_IOREQ_ABTS_PENDING) { + /* This is a late completion. Ignore it */ + spin_unlock_irqrestore(io_lock, flags); + return; + } + CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE; + CMD_ABTS_STATUS(sc) = hdr_status; + + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "abts cmpl recd. id %d status %s\n", + (int)(id & FNIC_TAG_MASK), + fnic_fcpio_status_to_str(hdr_status)); + + /* + * If scsi_eh thread is blocked waiting for abts to complete, + * signal completion to it. IO will be cleaned in the thread + * else clean it in this context + */ + if (io_req->abts_done) { + complete(io_req->abts_done); + spin_unlock_irqrestore(io_lock, flags); + } else { + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "abts cmpl, completing IO\n"); + CMD_SP(sc) = NULL; + sc->result = (DID_ERROR << 16); + + spin_unlock_irqrestore(io_lock, flags); + + fnic_release_ioreq_buf(fnic, io_req, sc); + mempool_free(io_req, fnic->io_req_pool); + if (sc->scsi_done) + sc->scsi_done(sc); + } + + } else if (id & FNIC_TAG_DEV_RST) { + /* Completion of device reset */ + CMD_LR_STATUS(sc) = hdr_status; + CMD_STATE(sc) = FNIC_IOREQ_CMD_COMPLETE; + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "dev reset cmpl recd. id %d status %s\n", + (int)(id & FNIC_TAG_MASK), + fnic_fcpio_status_to_str(hdr_status)); + if (io_req->dr_done) + complete(io_req->dr_done); + spin_unlock_irqrestore(io_lock, flags); + + } else { + shost_printk(KERN_ERR, fnic->lport->host, + "Unexpected itmf io state %s tag %x\n", + fnic_ioreq_state_to_str(CMD_STATE(sc)), id); + spin_unlock_irqrestore(io_lock, flags); + } + +} + +/* + * fnic_fcpio_cmpl_handler + * Routine to service the cq for wq_copy + */ +static int fnic_fcpio_cmpl_handler(struct vnic_dev *vdev, + unsigned int cq_index, + struct fcpio_fw_req *desc) +{ + struct fnic *fnic = vnic_dev_priv(vdev); + int ret = 0; + + switch (desc->hdr.type) { + case FCPIO_ACK: /* fw copied copy wq desc to its queue */ + fnic_fcpio_ack_handler(fnic, cq_index, desc); + break; + + case FCPIO_ICMND_CMPL: /* fw completed a command */ + fnic_fcpio_icmnd_cmpl_handler(fnic, desc); + break; + + case FCPIO_ITMF_CMPL: /* fw completed itmf (abort cmd, lun reset)*/ + fnic_fcpio_itmf_cmpl_handler(fnic, desc); + break; + + case FCPIO_FLOGI_REG_CMPL: /* fw completed flogi_reg */ + ret = fnic_fcpio_flogi_reg_cmpl_handler(fnic, desc); + break; + + case FCPIO_RESET_CMPL: /* fw completed reset */ + ret = fnic_fcpio_fw_reset_cmpl_handler(fnic, desc); + break; + + default: + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "firmware completion type %d\n", + desc->hdr.type); + break; + } + + return ret; +} + +/* + * fnic_wq_copy_cmpl_handler + * Routine to process wq copy + */ +int fnic_wq_copy_cmpl_handler(struct fnic *fnic, int copy_work_to_do) +{ + unsigned int wq_work_done = 0; + unsigned int i, cq_index; + unsigned int cur_work_done; + + for (i = 0; i < fnic->wq_copy_count; i++) { + cq_index = i + fnic->raw_wq_count + fnic->rq_count; + cur_work_done = vnic_cq_copy_service(&fnic->cq[cq_index], + fnic_fcpio_cmpl_handler, + copy_work_to_do); + wq_work_done += cur_work_done; + } + return wq_work_done; +} + +static void fnic_cleanup_io(struct fnic *fnic, int exclude_id) +{ + unsigned int i; + struct fnic_io_req *io_req; + unsigned long flags = 0; + struct scsi_cmnd *sc; + spinlock_t *io_lock; + + for (i = 0; i < FNIC_MAX_IO_REQ; i++) { + if (i == exclude_id) + continue; + + sc = scsi_host_find_tag(fnic->lport->host, i); + if (!sc) + continue; + + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + goto cleanup_scsi_cmd; + } + + CMD_SP(sc) = NULL; + + spin_unlock_irqrestore(io_lock, flags); + + /* + * If there is a scsi_cmnd associated with this io_req, then + * free the corresponding state + */ + fnic_release_ioreq_buf(fnic, io_req, sc); + mempool_free(io_req, fnic->io_req_pool); + +cleanup_scsi_cmd: + sc->result = DID_TRANSPORT_DISRUPTED << 16; + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "fnic_cleanup_io:" + " DID_TRANSPORT_DISRUPTED\n"); + + /* Complete the command to SCSI */ + if (sc->scsi_done) + sc->scsi_done(sc); + } +} + +void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq, + struct fcpio_host_req *desc) +{ + u32 id; + struct fnic *fnic = vnic_dev_priv(wq->vdev); + struct fnic_io_req *io_req; + struct scsi_cmnd *sc; + unsigned long flags; + spinlock_t *io_lock; + + /* get the tag reference */ + fcpio_tag_id_dec(&desc->hdr.tag, &id); + id &= FNIC_TAG_MASK; + + if (id >= FNIC_MAX_IO_REQ) + return; + + sc = scsi_host_find_tag(fnic->lport->host, id); + if (!sc) + return; + + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + + /* Get the IO context which this desc refers to */ + io_req = (struct fnic_io_req *)CMD_SP(sc); + + /* fnic interrupts are turned off by now */ + + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + goto wq_copy_cleanup_scsi_cmd; + } + + CMD_SP(sc) = NULL; + + spin_unlock_irqrestore(io_lock, flags); + + fnic_release_ioreq_buf(fnic, io_req, sc); + mempool_free(io_req, fnic->io_req_pool); + +wq_copy_cleanup_scsi_cmd: + sc->result = DID_NO_CONNECT << 16; + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "wq_copy_cleanup_handler:" + " DID_NO_CONNECT\n"); + + if (sc->scsi_done) + sc->scsi_done(sc); +} + +static inline int fnic_queue_abort_io_req(struct fnic *fnic, int tag, + u32 task_req, u8 *fc_lun, + struct fnic_io_req *io_req) +{ + struct vnic_wq_copy *wq = &fnic->wq_copy[0]; + unsigned long flags; + + spin_lock_irqsave(&fnic->wq_copy_lock[0], flags); + + if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0]) + free_wq_copy_descs(fnic, wq); + + if (!vnic_wq_copy_desc_avail(wq)) { + spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags); + return 1; + } + fnic_queue_wq_copy_desc_itmf(wq, tag | FNIC_TAG_ABORT, + 0, task_req, tag, fc_lun, io_req->port_id, + fnic->config.ra_tov, fnic->config.ed_tov); + + spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags); + return 0; +} + +void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) +{ + int tag; + struct fnic_io_req *io_req; + spinlock_t *io_lock; + unsigned long flags; + struct scsi_cmnd *sc; + struct scsi_lun fc_lun; + enum fnic_ioreq_state old_ioreq_state; + + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, + "fnic_rport_reset_exch called portid 0x%06x\n", + port_id); + + if (fnic->in_remove) + return; + + for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { + sc = scsi_host_find_tag(fnic->lport->host, tag); + if (!sc) + continue; + + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + + io_req = (struct fnic_io_req *)CMD_SP(sc); + + if (!io_req || io_req->port_id != port_id) { + spin_unlock_irqrestore(io_lock, flags); + continue; + } + + /* + * Found IO that is still pending with firmware and + * belongs to rport that went away + */ + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + continue; + } + old_ioreq_state = CMD_STATE(sc); + CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; + CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; + + BUG_ON(io_req->abts_done); + + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "fnic_rport_reset_exch: Issuing abts\n"); + + spin_unlock_irqrestore(io_lock, flags); + + /* Now queue the abort command to firmware */ + int_to_scsilun(sc->device->lun, &fc_lun); + + if (fnic_queue_abort_io_req(fnic, tag, + FCPIO_ITMF_ABT_TASK_TERM, + fc_lun.scsi_lun, io_req)) { + /* + * Revert the cmd state back to old state, if + * it hasnt changed in between. This cmd will get + * aborted later by scsi_eh, or cleaned up during + * lun reset + */ + io_lock = fnic_io_lock_hash(fnic, sc); + + spin_lock_irqsave(io_lock, flags); + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) + CMD_STATE(sc) = old_ioreq_state; + spin_unlock_irqrestore(io_lock, flags); + } + } + +} + +void fnic_terminate_rport_io(struct fc_rport *rport) +{ + int tag; + struct fnic_io_req *io_req; + spinlock_t *io_lock; + unsigned long flags; + struct scsi_cmnd *sc; + struct scsi_lun fc_lun; + struct fc_rport_libfc_priv *rdata = rport->dd_data; + struct fc_lport *lport = rdata->local_port; + struct fnic *fnic = lport_priv(lport); + struct fc_rport *cmd_rport; + enum fnic_ioreq_state old_ioreq_state; + + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, "fnic_terminate_rport_io called" + " wwpn 0x%llx, wwnn0x%llx, portid 0x%06x\n", + rport->port_name, rport->node_name, + rport->port_id); + + if (fnic->in_remove) + return; + + for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { + sc = scsi_host_find_tag(fnic->lport->host, tag); + if (!sc) + continue; + + cmd_rport = starget_to_rport(scsi_target(sc->device)); + if (rport != cmd_rport) + continue; + + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + + io_req = (struct fnic_io_req *)CMD_SP(sc); + + if (!io_req || rport != cmd_rport) { + spin_unlock_irqrestore(io_lock, flags); + continue; + } + + /* + * Found IO that is still pending with firmware and + * belongs to rport that went away + */ + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + continue; + } + old_ioreq_state = CMD_STATE(sc); + CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; + CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; + + BUG_ON(io_req->abts_done); + + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, + "fnic_terminate_rport_io: Issuing abts\n"); + + spin_unlock_irqrestore(io_lock, flags); + + /* Now queue the abort command to firmware */ + int_to_scsilun(sc->device->lun, &fc_lun); + + if (fnic_queue_abort_io_req(fnic, tag, + FCPIO_ITMF_ABT_TASK_TERM, + fc_lun.scsi_lun, io_req)) { + /* + * Revert the cmd state back to old state, if + * it hasnt changed in between. This cmd will get + * aborted later by scsi_eh, or cleaned up during + * lun reset + */ + io_lock = fnic_io_lock_hash(fnic, sc); + + spin_lock_irqsave(io_lock, flags); + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) + CMD_STATE(sc) = old_ioreq_state; + spin_unlock_irqrestore(io_lock, flags); + } + } + +} + +static void fnic_block_error_handler(struct scsi_cmnd *sc) +{ + struct Scsi_Host *shost = sc->device->host; + struct fc_rport *rport = starget_to_rport(scsi_target(sc->device)); + unsigned long flags; + + spin_lock_irqsave(shost->host_lock, flags); + while (rport->port_state == FC_PORTSTATE_BLOCKED) { + spin_unlock_irqrestore(shost->host_lock, flags); + msleep(1000); + spin_lock_irqsave(shost->host_lock, flags); + } + spin_unlock_irqrestore(shost->host_lock, flags); + +} + +/* + * This function is exported to SCSI for sending abort cmnds. + * A SCSI IO is represented by a io_req in the driver. + * The ioreq is linked to the SCSI Cmd, thus a link with the ULP's IO. + */ +int fnic_abort_cmd(struct scsi_cmnd *sc) +{ + struct fc_lport *lp; + struct fnic *fnic; + struct fnic_io_req *io_req; + struct fc_rport *rport; + spinlock_t *io_lock; + unsigned long flags; + int ret = SUCCESS; + u32 task_req; + struct scsi_lun fc_lun; + DECLARE_COMPLETION_ONSTACK(tm_done); + + /* Wait for rport to unblock */ + fnic_block_error_handler(sc); + + /* Get local-port, check ready and link up */ + lp = shost_priv(sc->device->host); + + fnic = lport_priv(lp); + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, + "Abort Cmd called FCID 0x%x, LUN 0x%x TAG %d\n", + (starget_to_rport(scsi_target(sc->device)))->port_id, + sc->device->lun, sc->request->tag); + + if (lp->state != LPORT_ST_READY || !(lp->link_up)) { + ret = FAILED; + goto fnic_abort_cmd_end; + } + + /* + * Avoid a race between SCSI issuing the abort and the device + * completing the command. + * + * If the command is already completed by the fw cmpl code, + * we just return SUCCESS from here. This means that the abort + * succeeded. In the SCSI ML, since the timeout for command has + * happened, the completion wont actually complete the command + * and it will be considered as an aborted command + * + * The CMD_SP will not be cleared except while holding io_req_lock. + */ + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + goto fnic_abort_cmd_end; + } + + io_req->abts_done = &tm_done; + + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + goto wait_pending; + } + /* + * Command is still pending, need to abort it + * If the firmware completes the command after this point, + * the completion wont be done till mid-layer, since abort + * has already started. + */ + CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; + CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; + + spin_unlock_irqrestore(io_lock, flags); + + /* + * Check readiness of the remote port. If the path to remote + * port is up, then send abts to the remote port to terminate + * the IO. Else, just locally terminate the IO in the firmware + */ + rport = starget_to_rport(scsi_target(sc->device)); + if (fc_remote_port_chkready(rport) == 0) + task_req = FCPIO_ITMF_ABT_TASK; + else + task_req = FCPIO_ITMF_ABT_TASK_TERM; + + /* Now queue the abort command to firmware */ + int_to_scsilun(sc->device->lun, &fc_lun); + + if (fnic_queue_abort_io_req(fnic, sc->request->tag, task_req, + fc_lun.scsi_lun, io_req)) { + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (io_req) + io_req->abts_done = NULL; + spin_unlock_irqrestore(io_lock, flags); + ret = FAILED; + goto fnic_abort_cmd_end; + } + + /* + * We queued an abort IO, wait for its completion. + * Once the firmware completes the abort command, it will + * wake up this thread. + */ + wait_pending: + wait_for_completion_timeout(&tm_done, + msecs_to_jiffies + (2 * fnic->config.ra_tov + + fnic->config.ed_tov)); + + /* Check the abort status */ + spin_lock_irqsave(io_lock, flags); + + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + ret = FAILED; + goto fnic_abort_cmd_end; + } + io_req->abts_done = NULL; + + /* fw did not complete abort, timed out */ + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + ret = FAILED; + goto fnic_abort_cmd_end; + } + + /* + * firmware completed the abort, check the status, + * free the io_req irrespective of failure or success + */ + if (CMD_ABTS_STATUS(sc) != FCPIO_SUCCESS) + ret = FAILED; + + CMD_SP(sc) = NULL; + + spin_unlock_irqrestore(io_lock, flags); + + fnic_release_ioreq_buf(fnic, io_req, sc); + mempool_free(io_req, fnic->io_req_pool); + +fnic_abort_cmd_end: + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Returning from abort cmd %s\n", + (ret == SUCCESS) ? + "SUCCESS" : "FAILED"); + return ret; +} + +static inline int fnic_queue_dr_io_req(struct fnic *fnic, + struct scsi_cmnd *sc, + struct fnic_io_req *io_req) +{ + struct vnic_wq_copy *wq = &fnic->wq_copy[0]; + struct scsi_lun fc_lun; + int ret = 0; + unsigned long intr_flags; + + spin_lock_irqsave(&fnic->wq_copy_lock[0], intr_flags); + + if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0]) + free_wq_copy_descs(fnic, wq); + + if (!vnic_wq_copy_desc_avail(wq)) { + ret = -EAGAIN; + goto lr_io_req_end; + } + + /* fill in the lun info */ + int_to_scsilun(sc->device->lun, &fc_lun); + + fnic_queue_wq_copy_desc_itmf(wq, sc->request->tag | FNIC_TAG_DEV_RST, + 0, FCPIO_ITMF_LUN_RESET, SCSI_NO_TAG, + fc_lun.scsi_lun, io_req->port_id, + fnic->config.ra_tov, fnic->config.ed_tov); + +lr_io_req_end: + spin_unlock_irqrestore(&fnic->wq_copy_lock[0], intr_flags); + + return ret; +} + +/* + * Clean up any pending aborts on the lun + * For each outstanding IO on this lun, whose abort is not completed by fw, + * issue a local abort. Wait for abort to complete. Return 0 if all commands + * successfully aborted, 1 otherwise + */ +static int fnic_clean_pending_aborts(struct fnic *fnic, + struct scsi_cmnd *lr_sc) +{ + int tag; + struct fnic_io_req *io_req; + spinlock_t *io_lock; + unsigned long flags; + int ret = 0; + struct scsi_cmnd *sc; + struct fc_rport *rport; + struct scsi_lun fc_lun; + struct scsi_device *lun_dev = lr_sc->device; + DECLARE_COMPLETION_ONSTACK(tm_done); + + for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { + sc = scsi_host_find_tag(fnic->lport->host, tag); + /* + * ignore this lun reset cmd or cmds that do not belong to + * this lun + */ + if (!sc || sc == lr_sc || sc->device != lun_dev) + continue; + + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + + io_req = (struct fnic_io_req *)CMD_SP(sc); + + if (!io_req || sc->device != lun_dev) { + spin_unlock_irqrestore(io_lock, flags); + continue; + } + + /* + * Found IO that is still pending with firmware and + * belongs to the LUN that we are resetting + */ + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Found IO in %s on lun\n", + fnic_ioreq_state_to_str(CMD_STATE(sc))); + + BUG_ON(CMD_STATE(sc) != FNIC_IOREQ_ABTS_PENDING); + + CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; + io_req->abts_done = &tm_done; + spin_unlock_irqrestore(io_lock, flags); + + /* Now queue the abort command to firmware */ + int_to_scsilun(sc->device->lun, &fc_lun); + rport = starget_to_rport(scsi_target(sc->device)); + + if (fnic_queue_abort_io_req(fnic, tag, + FCPIO_ITMF_ABT_TASK_TERM, + fc_lun.scsi_lun, io_req)) { + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (io_req) + io_req->abts_done = NULL; + spin_unlock_irqrestore(io_lock, flags); + ret = 1; + goto clean_pending_aborts_end; + } + + wait_for_completion_timeout(&tm_done, + msecs_to_jiffies + (fnic->config.ed_tov)); + + /* Recheck cmd state to check if it is now aborted */ + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + ret = 1; + goto clean_pending_aborts_end; + } + + io_req->abts_done = NULL; + + /* if abort is still pending with fw, fail */ + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + ret = 1; + goto clean_pending_aborts_end; + } + CMD_SP(sc) = NULL; + spin_unlock_irqrestore(io_lock, flags); + + fnic_release_ioreq_buf(fnic, io_req, sc); + mempool_free(io_req, fnic->io_req_pool); + } + +clean_pending_aborts_end: + return ret; +} + +/* + * SCSI Eh thread issues a Lun Reset when one or more commands on a LUN + * fail to get aborted. It calls driver's eh_device_reset with a SCSI command + * on the LUN. + */ +int fnic_device_reset(struct scsi_cmnd *sc) +{ + struct fc_lport *lp; + struct fnic *fnic; + struct fnic_io_req *io_req; + struct fc_rport *rport; + int status; + int ret = FAILED; + spinlock_t *io_lock; + unsigned long flags; + DECLARE_COMPLETION_ONSTACK(tm_done); + + /* Wait for rport to unblock */ + fnic_block_error_handler(sc); + + /* Get local-port, check ready and link up */ + lp = shost_priv(sc->device->host); + + fnic = lport_priv(lp); + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, + "Device reset called FCID 0x%x, LUN 0x%x\n", + (starget_to_rport(scsi_target(sc->device)))->port_id, + sc->device->lun); + + + if (lp->state != LPORT_ST_READY || !(lp->link_up)) + goto fnic_device_reset_end; + + /* Check if remote port up */ + rport = starget_to_rport(scsi_target(sc->device)); + if (fc_remote_port_chkready(rport)) + goto fnic_device_reset_end; + + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + + /* + * If there is a io_req attached to this command, then use it, + * else allocate a new one. + */ + if (!io_req) { + io_req = mempool_alloc(fnic->io_req_pool, GFP_ATOMIC); + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + goto fnic_device_reset_end; + } + memset(io_req, 0, sizeof(*io_req)); + io_req->port_id = rport->port_id; + CMD_SP(sc) = (char *)io_req; + } + io_req->dr_done = &tm_done; + CMD_STATE(sc) = FNIC_IOREQ_CMD_PENDING; + CMD_LR_STATUS(sc) = FCPIO_INVALID_CODE; + spin_unlock_irqrestore(io_lock, flags); + + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "TAG %d\n", + sc->request->tag); + + /* + * issue the device reset, if enqueue failed, clean up the ioreq + * and break assoc with scsi cmd + */ + if (fnic_queue_dr_io_req(fnic, sc, io_req)) { + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (io_req) + io_req->dr_done = NULL; + goto fnic_device_reset_clean; + } + + /* + * Wait on the local completion for LUN reset. The io_req may be + * freed while we wait since we hold no lock. + */ + wait_for_completion_timeout(&tm_done, + msecs_to_jiffies(FNIC_LUN_RESET_TIMEOUT)); + + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + goto fnic_device_reset_end; + } + io_req->dr_done = NULL; + + status = CMD_LR_STATUS(sc); + spin_unlock_irqrestore(io_lock, flags); + + /* + * If lun reset not completed, bail out with failed. io_req + * gets cleaned up during higher levels of EH + */ + if (status == FCPIO_INVALID_CODE) { + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Device reset timed out\n"); + goto fnic_device_reset_end; + } + + /* Completed, but not successful, clean up the io_req, return fail */ + if (status != FCPIO_SUCCESS) { + spin_lock_irqsave(io_lock, flags); + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, + "Device reset completed - failed\n"); + io_req = (struct fnic_io_req *)CMD_SP(sc); + goto fnic_device_reset_clean; + } + + /* + * Clean up any aborts on this lun that have still not + * completed. If any of these fail, then LUN reset fails. + * clean_pending_aborts cleans all cmds on this lun except + * the lun reset cmd. If all cmds get cleaned, the lun reset + * succeeds + */ + if (fnic_clean_pending_aborts(fnic, sc)) { + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Device reset failed" + " since could not abort all IOs\n"); + goto fnic_device_reset_clean; + } + + /* Clean lun reset command */ + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (io_req) + /* Completed, and successful */ + ret = SUCCESS; + +fnic_device_reset_clean: + if (io_req) + CMD_SP(sc) = NULL; + + spin_unlock_irqrestore(io_lock, flags); + + if (io_req) { + fnic_release_ioreq_buf(fnic, io_req, sc); + mempool_free(io_req, fnic->io_req_pool); + } + +fnic_device_reset_end: + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Returning from device reset %s\n", + (ret == SUCCESS) ? + "SUCCESS" : "FAILED"); + return ret; +} + +/* Clean up all IOs, clean up libFC local port */ +int fnic_reset(struct Scsi_Host *shost) +{ + struct fc_lport *lp; + struct fnic *fnic; + int ret = SUCCESS; + + lp = shost_priv(shost); + fnic = lport_priv(lp); + + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "fnic_reset called\n"); + + /* + * Reset local port, this will clean up libFC exchanges, + * reset remote port sessions, and if link is up, begin flogi + */ + if (lp->tt.lport_reset(lp)) + ret = FAILED; + + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Returning from fnic reset %s\n", + (ret == SUCCESS) ? + "SUCCESS" : "FAILED"); + + return ret; +} + +/* + * SCSI Error handling calls driver's eh_host_reset if all prior + * error handling levels return FAILED. If host reset completes + * successfully, and if link is up, then Fabric login begins. + * + * Host Reset is the highest level of error recovery. If this fails, then + * host is offlined by SCSI. + * + */ +int fnic_host_reset(struct scsi_cmnd *sc) +{ + int ret; + unsigned long wait_host_tmo; + struct Scsi_Host *shost = sc->device->host; + struct fc_lport *lp = shost_priv(shost); + + /* + * If fnic_reset is successful, wait for fabric login to complete + * scsi-ml tries to send a TUR to every device if host reset is + * successful, so before returning to scsi, fabric should be up + */ + ret = fnic_reset(shost); + if (ret == SUCCESS) { + wait_host_tmo = jiffies + FNIC_HOST_RESET_SETTLE_TIME * HZ; + ret = FAILED; + while (time_before(jiffies, wait_host_tmo)) { + if ((lp->state == LPORT_ST_READY) && + (lp->link_up)) { + ret = SUCCESS; + break; + } + ssleep(1); + } + } + + return ret; +} + +/* + * This fxn is called from libFC when host is removed + */ +void fnic_scsi_abort_io(struct fc_lport *lp) +{ + int err = 0; + unsigned long flags; + enum fnic_state old_state; + struct fnic *fnic = lport_priv(lp); + DECLARE_COMPLETION_ONSTACK(remove_wait); + + /* Issue firmware reset for fnic, wait for reset to complete */ + spin_lock_irqsave(&fnic->fnic_lock, flags); + fnic->remove_wait = &remove_wait; + old_state = fnic->state; + fnic->state = FNIC_IN_FC_TRANS_ETH_MODE; + vnic_dev_del_addr(fnic->vdev, fnic->data_src_addr); + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + err = fnic_fw_reset_handler(fnic); + if (err) { + spin_lock_irqsave(&fnic->fnic_lock, flags); + if (fnic->state == FNIC_IN_FC_TRANS_ETH_MODE) + fnic->state = old_state; + fnic->remove_wait = NULL; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + return; + } + + /* Wait for firmware reset to complete */ + wait_for_completion_timeout(&remove_wait, + msecs_to_jiffies(FNIC_RMDEVICE_TIMEOUT)); + + spin_lock_irqsave(&fnic->fnic_lock, flags); + fnic->remove_wait = NULL; + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "fnic_scsi_abort_io %s\n", + (fnic->state == FNIC_IN_ETH_MODE) ? + "SUCCESS" : "FAILED"); + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + +} + +/* + * This fxn called from libFC to clean up driver IO state on link down + */ +void fnic_scsi_cleanup(struct fc_lport *lp) +{ + unsigned long flags; + enum fnic_state old_state; + struct fnic *fnic = lport_priv(lp); + + /* issue fw reset */ + spin_lock_irqsave(&fnic->fnic_lock, flags); + old_state = fnic->state; + fnic->state = FNIC_IN_FC_TRANS_ETH_MODE; + vnic_dev_del_addr(fnic->vdev, fnic->data_src_addr); + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + + if (fnic_fw_reset_handler(fnic)) { + spin_lock_irqsave(&fnic->fnic_lock, flags); + if (fnic->state == FNIC_IN_FC_TRANS_ETH_MODE) + fnic->state = old_state; + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + } + +} + +void fnic_empty_scsi_cleanup(struct fc_lport *lp) +{ +} + +void fnic_exch_mgr_reset(struct fc_lport *lp, u32 sid, u32 did) +{ + struct fnic *fnic = lport_priv(lp); + + /* Non-zero sid, nothing to do */ + if (sid) + goto call_fc_exch_mgr_reset; + + if (did) { + fnic_rport_exch_reset(fnic, did); + goto call_fc_exch_mgr_reset; + } + + /* + * sid = 0, did = 0 + * link down or device being removed + */ + if (!fnic->in_remove) + fnic_scsi_cleanup(lp); + else + fnic_scsi_abort_io(lp); + + /* call libFC exch mgr reset to reset its exchanges */ +call_fc_exch_mgr_reset: + fc_exch_mgr_reset(lp, sid, did); + +} diff --git a/drivers/scsi/fnic/rq_enet_desc.h b/drivers/scsi/fnic/rq_enet_desc.h new file mode 100644 index 000000000000..92e80ae6b725 --- /dev/null +++ b/drivers/scsi/fnic/rq_enet_desc.h @@ -0,0 +1,58 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _RQ_ENET_DESC_H_ +#define _RQ_ENET_DESC_H_ + +/* Ethernet receive queue descriptor: 16B */ +struct rq_enet_desc { + __le64 address; + __le16 length_type; + u8 reserved[6]; +}; + +enum rq_enet_type_types { + RQ_ENET_TYPE_ONLY_SOP = 0, + RQ_ENET_TYPE_NOT_SOP = 1, + RQ_ENET_TYPE_RESV2 = 2, + RQ_ENET_TYPE_RESV3 = 3, +}; + +#define RQ_ENET_ADDR_BITS 64 +#define RQ_ENET_LEN_BITS 14 +#define RQ_ENET_LEN_MASK ((1 << RQ_ENET_LEN_BITS) - 1) +#define RQ_ENET_TYPE_BITS 2 +#define RQ_ENET_TYPE_MASK ((1 << RQ_ENET_TYPE_BITS) - 1) + +static inline void rq_enet_desc_enc(struct rq_enet_desc *desc, + u64 address, u8 type, u16 length) +{ + desc->address = cpu_to_le64(address); + desc->length_type = cpu_to_le16((length & RQ_ENET_LEN_MASK) | + ((type & RQ_ENET_TYPE_MASK) << RQ_ENET_LEN_BITS)); +} + +static inline void rq_enet_desc_dec(struct rq_enet_desc *desc, + u64 *address, u8 *type, u16 *length) +{ + *address = le64_to_cpu(desc->address); + *length = le16_to_cpu(desc->length_type) & RQ_ENET_LEN_MASK; + *type = (u8)((le16_to_cpu(desc->length_type) >> RQ_ENET_LEN_BITS) & + RQ_ENET_TYPE_MASK); +} + +#endif /* _RQ_ENET_DESC_H_ */ diff --git a/drivers/scsi/fnic/vnic_cq.c b/drivers/scsi/fnic/vnic_cq.c new file mode 100644 index 000000000000..c5db32eda5ef --- /dev/null +++ b/drivers/scsi/fnic/vnic_cq.c @@ -0,0 +1,85 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/pci.h> +#include "vnic_dev.h" +#include "vnic_cq.h" + +void vnic_cq_free(struct vnic_cq *cq) +{ + vnic_dev_free_desc_ring(cq->vdev, &cq->ring); + + cq->ctrl = NULL; +} + +int vnic_cq_alloc(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index, + unsigned int desc_count, unsigned int desc_size) +{ + int err; + + cq->index = index; + cq->vdev = vdev; + + cq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_CQ, index); + if (!cq->ctrl) { + printk(KERN_ERR "Failed to hook CQ[%d] resource\n", index); + return -EINVAL; + } + + err = vnic_dev_alloc_desc_ring(vdev, &cq->ring, desc_count, desc_size); + if (err) + return err; + + return 0; +} + +void vnic_cq_init(struct vnic_cq *cq, unsigned int flow_control_enable, + unsigned int color_enable, unsigned int cq_head, unsigned int cq_tail, + unsigned int cq_tail_color, unsigned int interrupt_enable, + unsigned int cq_entry_enable, unsigned int cq_message_enable, + unsigned int interrupt_offset, u64 cq_message_addr) +{ + u64 paddr; + + paddr = (u64)cq->ring.base_addr | VNIC_PADDR_TARGET; + writeq(paddr, &cq->ctrl->ring_base); + iowrite32(cq->ring.desc_count, &cq->ctrl->ring_size); + iowrite32(flow_control_enable, &cq->ctrl->flow_control_enable); + iowrite32(color_enable, &cq->ctrl->color_enable); + iowrite32(cq_head, &cq->ctrl->cq_head); + iowrite32(cq_tail, &cq->ctrl->cq_tail); + iowrite32(cq_tail_color, &cq->ctrl->cq_tail_color); + iowrite32(interrupt_enable, &cq->ctrl->interrupt_enable); + iowrite32(cq_entry_enable, &cq->ctrl->cq_entry_enable); + iowrite32(cq_message_enable, &cq->ctrl->cq_message_enable); + iowrite32(interrupt_offset, &cq->ctrl->interrupt_offset); + writeq(cq_message_addr, &cq->ctrl->cq_message_addr); +} + +void vnic_cq_clean(struct vnic_cq *cq) +{ + cq->to_clean = 0; + cq->last_color = 0; + + iowrite32(0, &cq->ctrl->cq_head); + iowrite32(0, &cq->ctrl->cq_tail); + iowrite32(1, &cq->ctrl->cq_tail_color); + + vnic_dev_clear_desc_ring(&cq->ring); +} diff --git a/drivers/scsi/fnic/vnic_cq.h b/drivers/scsi/fnic/vnic_cq.h new file mode 100644 index 000000000000..4ede6809fb1e --- /dev/null +++ b/drivers/scsi/fnic/vnic_cq.h @@ -0,0 +1,121 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_CQ_H_ +#define _VNIC_CQ_H_ + +#include "cq_desc.h" +#include "vnic_dev.h" + +/* + * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth + * Driver) when both are built with CONFIG options =y + */ +#define vnic_cq_service fnic_cq_service +#define vnic_cq_free fnic_cq_free +#define vnic_cq_alloc fnic_cq_alloc +#define vnic_cq_init fnic_cq_init +#define vnic_cq_clean fnic_cq_clean + +/* Completion queue control */ +struct vnic_cq_ctrl { + u64 ring_base; /* 0x00 */ + u32 ring_size; /* 0x08 */ + u32 pad0; + u32 flow_control_enable; /* 0x10 */ + u32 pad1; + u32 color_enable; /* 0x18 */ + u32 pad2; + u32 cq_head; /* 0x20 */ + u32 pad3; + u32 cq_tail; /* 0x28 */ + u32 pad4; + u32 cq_tail_color; /* 0x30 */ + u32 pad5; + u32 interrupt_enable; /* 0x38 */ + u32 pad6; + u32 cq_entry_enable; /* 0x40 */ + u32 pad7; + u32 cq_message_enable; /* 0x48 */ + u32 pad8; + u32 interrupt_offset; /* 0x50 */ + u32 pad9; + u64 cq_message_addr; /* 0x58 */ + u32 pad10; +}; + +struct vnic_cq { + unsigned int index; + struct vnic_dev *vdev; + struct vnic_cq_ctrl __iomem *ctrl; /* memory-mapped */ + struct vnic_dev_ring ring; + unsigned int to_clean; + unsigned int last_color; +}; + +static inline unsigned int vnic_cq_service(struct vnic_cq *cq, + unsigned int work_to_do, + int (*q_service)(struct vnic_dev *vdev, struct cq_desc *cq_desc, + u8 type, u16 q_number, u16 completed_index, void *opaque), + void *opaque) +{ + struct cq_desc *cq_desc; + unsigned int work_done = 0; + u16 q_number, completed_index; + u8 type, color; + + cq_desc = (struct cq_desc *)((u8 *)cq->ring.descs + + cq->ring.desc_size * cq->to_clean); + cq_desc_dec(cq_desc, &type, &color, + &q_number, &completed_index); + + while (color != cq->last_color) { + + if ((*q_service)(cq->vdev, cq_desc, type, + q_number, completed_index, opaque)) + break; + + cq->to_clean++; + if (cq->to_clean == cq->ring.desc_count) { + cq->to_clean = 0; + cq->last_color = cq->last_color ? 0 : 1; + } + + cq_desc = (struct cq_desc *)((u8 *)cq->ring.descs + + cq->ring.desc_size * cq->to_clean); + cq_desc_dec(cq_desc, &type, &color, + &q_number, &completed_index); + + work_done++; + if (work_done >= work_to_do) + break; + } + + return work_done; +} + +void vnic_cq_free(struct vnic_cq *cq); +int vnic_cq_alloc(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index, + unsigned int desc_count, unsigned int desc_size); +void vnic_cq_init(struct vnic_cq *cq, unsigned int flow_control_enable, + unsigned int color_enable, unsigned int cq_head, unsigned int cq_tail, + unsigned int cq_tail_color, unsigned int interrupt_enable, + unsigned int cq_entry_enable, unsigned int message_enable, + unsigned int interrupt_offset, u64 message_addr); +void vnic_cq_clean(struct vnic_cq *cq); + +#endif /* _VNIC_CQ_H_ */ diff --git a/drivers/scsi/fnic/vnic_cq_copy.h b/drivers/scsi/fnic/vnic_cq_copy.h new file mode 100644 index 000000000000..7901ce255a81 --- /dev/null +++ b/drivers/scsi/fnic/vnic_cq_copy.h @@ -0,0 +1,62 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_CQ_COPY_H_ +#define _VNIC_CQ_COPY_H_ + +#include "fcpio.h" + +static inline unsigned int vnic_cq_copy_service( + struct vnic_cq *cq, + int (*q_service)(struct vnic_dev *vdev, + unsigned int index, + struct fcpio_fw_req *desc), + unsigned int work_to_do) + +{ + struct fcpio_fw_req *desc; + unsigned int work_done = 0; + u8 color; + + desc = (struct fcpio_fw_req *)((u8 *)cq->ring.descs + + cq->ring.desc_size * cq->to_clean); + fcpio_color_dec(desc, &color); + + while (color != cq->last_color) { + + if ((*q_service)(cq->vdev, cq->index, desc)) + break; + + cq->to_clean++; + if (cq->to_clean == cq->ring.desc_count) { + cq->to_clean = 0; + cq->last_color = cq->last_color ? 0 : 1; + } + + desc = (struct fcpio_fw_req *)((u8 *)cq->ring.descs + + cq->ring.desc_size * cq->to_clean); + fcpio_color_dec(desc, &color); + + work_done++; + if (work_done >= work_to_do) + break; + } + + return work_done; +} + +#endif /* _VNIC_CQ_COPY_H_ */ diff --git a/drivers/scsi/fnic/vnic_dev.c b/drivers/scsi/fnic/vnic_dev.c new file mode 100644 index 000000000000..566770645086 --- /dev/null +++ b/drivers/scsi/fnic/vnic_dev.c @@ -0,0 +1,690 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/if_ether.h> +#include "vnic_resource.h" +#include "vnic_devcmd.h" +#include "vnic_dev.h" +#include "vnic_stats.h" + +struct vnic_res { + void __iomem *vaddr; + unsigned int count; +}; + +struct vnic_dev { + void *priv; + struct pci_dev *pdev; + struct vnic_res res[RES_TYPE_MAX]; + enum vnic_dev_intr_mode intr_mode; + struct vnic_devcmd __iomem *devcmd; + struct vnic_devcmd_notify *notify; + struct vnic_devcmd_notify notify_copy; + dma_addr_t notify_pa; + u32 *linkstatus; + dma_addr_t linkstatus_pa; + struct vnic_stats *stats; + dma_addr_t stats_pa; + struct vnic_devcmd_fw_info *fw_info; + dma_addr_t fw_info_pa; +}; + +#define VNIC_MAX_RES_HDR_SIZE \ + (sizeof(struct vnic_resource_header) + \ + sizeof(struct vnic_resource) * RES_TYPE_MAX) +#define VNIC_RES_STRIDE 128 + +void *vnic_dev_priv(struct vnic_dev *vdev) +{ + return vdev->priv; +} + +static int vnic_dev_discover_res(struct vnic_dev *vdev, + struct vnic_dev_bar *bar) +{ + struct vnic_resource_header __iomem *rh; + struct vnic_resource __iomem *r; + u8 type; + + if (bar->len < VNIC_MAX_RES_HDR_SIZE) { + printk(KERN_ERR "vNIC BAR0 res hdr length error\n"); + return -EINVAL; + } + + rh = bar->vaddr; + if (!rh) { + printk(KERN_ERR "vNIC BAR0 res hdr not mem-mapped\n"); + return -EINVAL; + } + + if (ioread32(&rh->magic) != VNIC_RES_MAGIC || + ioread32(&rh->version) != VNIC_RES_VERSION) { + printk(KERN_ERR "vNIC BAR0 res magic/version error " + "exp (%lx/%lx) curr (%x/%x)\n", + VNIC_RES_MAGIC, VNIC_RES_VERSION, + ioread32(&rh->magic), ioread32(&rh->version)); + return -EINVAL; + } + + r = (struct vnic_resource __iomem *)(rh + 1); + + while ((type = ioread8(&r->type)) != RES_TYPE_EOL) { + + u8 bar_num = ioread8(&r->bar); + u32 bar_offset = ioread32(&r->bar_offset); + u32 count = ioread32(&r->count); + u32 len; + + r++; + + if (bar_num != 0) /* only mapping in BAR0 resources */ + continue; + + switch (type) { + case RES_TYPE_WQ: + case RES_TYPE_RQ: + case RES_TYPE_CQ: + case RES_TYPE_INTR_CTRL: + /* each count is stride bytes long */ + len = count * VNIC_RES_STRIDE; + if (len + bar_offset > bar->len) { + printk(KERN_ERR "vNIC BAR0 resource %d " + "out-of-bounds, offset 0x%x + " + "size 0x%x > bar len 0x%lx\n", + type, bar_offset, + len, + bar->len); + return -EINVAL; + } + break; + case RES_TYPE_INTR_PBA_LEGACY: + case RES_TYPE_DEVCMD: + len = count; + break; + default: + continue; + } + + vdev->res[type].count = count; + vdev->res[type].vaddr = (char __iomem *)bar->vaddr + bar_offset; + } + + return 0; +} + +unsigned int vnic_dev_get_res_count(struct vnic_dev *vdev, + enum vnic_res_type type) +{ + return vdev->res[type].count; +} + +void __iomem *vnic_dev_get_res(struct vnic_dev *vdev, enum vnic_res_type type, + unsigned int index) +{ + if (!vdev->res[type].vaddr) + return NULL; + + switch (type) { + case RES_TYPE_WQ: + case RES_TYPE_RQ: + case RES_TYPE_CQ: + case RES_TYPE_INTR_CTRL: + return (char __iomem *)vdev->res[type].vaddr + + index * VNIC_RES_STRIDE; + default: + return (char __iomem *)vdev->res[type].vaddr; + } +} + +unsigned int vnic_dev_desc_ring_size(struct vnic_dev_ring *ring, + unsigned int desc_count, + unsigned int desc_size) +{ + /* The base address of the desc rings must be 512 byte aligned. + * Descriptor count is aligned to groups of 32 descriptors. A + * count of 0 means the maximum 4096 descriptors. Descriptor + * size is aligned to 16 bytes. + */ + + unsigned int count_align = 32; + unsigned int desc_align = 16; + + ring->base_align = 512; + + if (desc_count == 0) + desc_count = 4096; + + ring->desc_count = ALIGN(desc_count, count_align); + + ring->desc_size = ALIGN(desc_size, desc_align); + + ring->size = ring->desc_count * ring->desc_size; + ring->size_unaligned = ring->size + ring->base_align; + + return ring->size_unaligned; +} + +void vnic_dev_clear_desc_ring(struct vnic_dev_ring *ring) +{ + memset(ring->descs, 0, ring->size); +} + +int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring, + unsigned int desc_count, unsigned int desc_size) +{ + vnic_dev_desc_ring_size(ring, desc_count, desc_size); + + ring->descs_unaligned = pci_alloc_consistent(vdev->pdev, + ring->size_unaligned, + &ring->base_addr_unaligned); + + if (!ring->descs_unaligned) { + printk(KERN_ERR + "Failed to allocate ring (size=%d), aborting\n", + (int)ring->size); + return -ENOMEM; + } + + ring->base_addr = ALIGN(ring->base_addr_unaligned, + ring->base_align); + ring->descs = (u8 *)ring->descs_unaligned + + (ring->base_addr - ring->base_addr_unaligned); + + vnic_dev_clear_desc_ring(ring); + + ring->desc_avail = ring->desc_count - 1; + + return 0; +} + +void vnic_dev_free_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring) +{ + if (ring->descs) { + pci_free_consistent(vdev->pdev, + ring->size_unaligned, + ring->descs_unaligned, + ring->base_addr_unaligned); + ring->descs = NULL; + } +} + +int vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, + u64 *a0, u64 *a1, int wait) +{ + struct vnic_devcmd __iomem *devcmd = vdev->devcmd; + int delay; + u32 status; + int dev_cmd_err[] = { + /* convert from fw's version of error.h to host's version */ + 0, /* ERR_SUCCESS */ + EINVAL, /* ERR_EINVAL */ + EFAULT, /* ERR_EFAULT */ + EPERM, /* ERR_EPERM */ + EBUSY, /* ERR_EBUSY */ + }; + int err; + + status = ioread32(&devcmd->status); + if (status & STAT_BUSY) { + printk(KERN_ERR "Busy devcmd %d\n", _CMD_N(cmd)); + return -EBUSY; + } + + if (_CMD_DIR(cmd) & _CMD_DIR_WRITE) { + writeq(*a0, &devcmd->args[0]); + writeq(*a1, &devcmd->args[1]); + wmb(); + } + + iowrite32(cmd, &devcmd->cmd); + + if ((_CMD_FLAGS(cmd) & _CMD_FLAGS_NOWAIT)) + return 0; + + for (delay = 0; delay < wait; delay++) { + + udelay(100); + + status = ioread32(&devcmd->status); + if (!(status & STAT_BUSY)) { + + if (status & STAT_ERROR) { + err = dev_cmd_err[(int)readq(&devcmd->args[0])]; + printk(KERN_ERR "Error %d devcmd %d\n", + err, _CMD_N(cmd)); + return -err; + } + + if (_CMD_DIR(cmd) & _CMD_DIR_READ) { + rmb(); + *a0 = readq(&devcmd->args[0]); + *a1 = readq(&devcmd->args[1]); + } + + return 0; + } + } + + printk(KERN_ERR "Timedout devcmd %d\n", _CMD_N(cmd)); + return -ETIMEDOUT; +} + +int vnic_dev_fw_info(struct vnic_dev *vdev, + struct vnic_devcmd_fw_info **fw_info) +{ + u64 a0, a1 = 0; + int wait = 1000; + int err = 0; + + if (!vdev->fw_info) { + vdev->fw_info = pci_alloc_consistent(vdev->pdev, + sizeof(struct vnic_devcmd_fw_info), + &vdev->fw_info_pa); + if (!vdev->fw_info) + return -ENOMEM; + + a0 = vdev->fw_info_pa; + + /* only get fw_info once and cache it */ + err = vnic_dev_cmd(vdev, CMD_MCPU_FW_INFO, &a0, &a1, wait); + } + + *fw_info = vdev->fw_info; + + return err; +} + +int vnic_dev_spec(struct vnic_dev *vdev, unsigned int offset, unsigned int size, + void *value) +{ + u64 a0, a1; + int wait = 1000; + int err; + + a0 = offset; + a1 = size; + + err = vnic_dev_cmd(vdev, CMD_DEV_SPEC, &a0, &a1, wait); + + switch (size) { + case 1: + *(u8 *)value = (u8)a0; + break; + case 2: + *(u16 *)value = (u16)a0; + break; + case 4: + *(u32 *)value = (u32)a0; + break; + case 8: + *(u64 *)value = a0; + break; + default: + BUG(); + break; + } + + return err; +} + +int vnic_dev_stats_clear(struct vnic_dev *vdev) +{ + u64 a0 = 0, a1 = 0; + int wait = 1000; + return vnic_dev_cmd(vdev, CMD_STATS_CLEAR, &a0, &a1, wait); +} + +int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats) +{ + u64 a0, a1; + int wait = 1000; + + if (!vdev->stats) { + vdev->stats = pci_alloc_consistent(vdev->pdev, + sizeof(struct vnic_stats), &vdev->stats_pa); + if (!vdev->stats) + return -ENOMEM; + } + + *stats = vdev->stats; + a0 = vdev->stats_pa; + a1 = sizeof(struct vnic_stats); + + return vnic_dev_cmd(vdev, CMD_STATS_DUMP, &a0, &a1, wait); +} + +int vnic_dev_close(struct vnic_dev *vdev) +{ + u64 a0 = 0, a1 = 0; + int wait = 1000; + return vnic_dev_cmd(vdev, CMD_CLOSE, &a0, &a1, wait); +} + +int vnic_dev_enable(struct vnic_dev *vdev) +{ + u64 a0 = 0, a1 = 0; + int wait = 1000; + return vnic_dev_cmd(vdev, CMD_ENABLE, &a0, &a1, wait); +} + +int vnic_dev_disable(struct vnic_dev *vdev) +{ + u64 a0 = 0, a1 = 0; + int wait = 1000; + return vnic_dev_cmd(vdev, CMD_DISABLE, &a0, &a1, wait); +} + +int vnic_dev_open(struct vnic_dev *vdev, int arg) +{ + u64 a0 = (u32)arg, a1 = 0; + int wait = 1000; + return vnic_dev_cmd(vdev, CMD_OPEN, &a0, &a1, wait); +} + +int vnic_dev_open_done(struct vnic_dev *vdev, int *done) +{ + u64 a0 = 0, a1 = 0; + int wait = 1000; + int err; + + *done = 0; + + err = vnic_dev_cmd(vdev, CMD_OPEN_STATUS, &a0, &a1, wait); + if (err) + return err; + + *done = (a0 == 0); + + return 0; +} + +int vnic_dev_soft_reset(struct vnic_dev *vdev, int arg) +{ + u64 a0 = (u32)arg, a1 = 0; + int wait = 1000; + return vnic_dev_cmd(vdev, CMD_SOFT_RESET, &a0, &a1, wait); +} + +int vnic_dev_soft_reset_done(struct vnic_dev *vdev, int *done) +{ + u64 a0 = 0, a1 = 0; + int wait = 1000; + int err; + + *done = 0; + + err = vnic_dev_cmd(vdev, CMD_SOFT_RESET_STATUS, &a0, &a1, wait); + if (err) + return err; + + *done = (a0 == 0); + + return 0; +} + +int vnic_dev_hang_notify(struct vnic_dev *vdev) +{ + u64 a0, a1; + int wait = 1000; + return vnic_dev_cmd(vdev, CMD_HANG_NOTIFY, &a0, &a1, wait); +} + +int vnic_dev_mac_addr(struct vnic_dev *vdev, u8 *mac_addr) +{ + u64 a0, a1; + int wait = 1000; + int err, i; + + for (i = 0; i < ETH_ALEN; i++) + mac_addr[i] = 0; + + err = vnic_dev_cmd(vdev, CMD_MAC_ADDR, &a0, &a1, wait); + if (err) + return err; + + for (i = 0; i < ETH_ALEN; i++) + mac_addr[i] = ((u8 *)&a0)[i]; + + return 0; +} + +void vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast, + int broadcast, int promisc, int allmulti) +{ + u64 a0, a1 = 0; + int wait = 1000; + int err; + + a0 = (directed ? CMD_PFILTER_DIRECTED : 0) | + (multicast ? CMD_PFILTER_MULTICAST : 0) | + (broadcast ? CMD_PFILTER_BROADCAST : 0) | + (promisc ? CMD_PFILTER_PROMISCUOUS : 0) | + (allmulti ? CMD_PFILTER_ALL_MULTICAST : 0); + + err = vnic_dev_cmd(vdev, CMD_PACKET_FILTER, &a0, &a1, wait); + if (err) + printk(KERN_ERR "Can't set packet filter\n"); +} + +void vnic_dev_add_addr(struct vnic_dev *vdev, u8 *addr) +{ + u64 a0 = 0, a1 = 0; + int wait = 1000; + int err; + int i; + + for (i = 0; i < ETH_ALEN; i++) + ((u8 *)&a0)[i] = addr[i]; + + err = vnic_dev_cmd(vdev, CMD_ADDR_ADD, &a0, &a1, wait); + if (err) + printk(KERN_ERR + "Can't add addr [%02x:%02x:%02x:%02x:%02x:%02x], %d\n", + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], + err); +} + +void vnic_dev_del_addr(struct vnic_dev *vdev, u8 *addr) +{ + u64 a0 = 0, a1 = 0; + int wait = 1000; + int err; + int i; + + for (i = 0; i < ETH_ALEN; i++) + ((u8 *)&a0)[i] = addr[i]; + + err = vnic_dev_cmd(vdev, CMD_ADDR_DEL, &a0, &a1, wait); + if (err) + printk(KERN_ERR + "Can't del addr [%02x:%02x:%02x:%02x:%02x:%02x], %d\n", + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], + err); +} + +int vnic_dev_notify_set(struct vnic_dev *vdev, u16 intr) +{ + u64 a0, a1; + int wait = 1000; + + if (!vdev->notify) { + vdev->notify = pci_alloc_consistent(vdev->pdev, + sizeof(struct vnic_devcmd_notify), + &vdev->notify_pa); + if (!vdev->notify) + return -ENOMEM; + } + + a0 = vdev->notify_pa; + a1 = ((u64)intr << 32) & 0x0000ffff00000000ULL; + a1 += sizeof(struct vnic_devcmd_notify); + + return vnic_dev_cmd(vdev, CMD_NOTIFY, &a0, &a1, wait); +} + +void vnic_dev_notify_unset(struct vnic_dev *vdev) +{ + u64 a0, a1; + int wait = 1000; + + a0 = 0; /* paddr = 0 to unset notify buffer */ + a1 = 0x0000ffff00000000ULL; /* intr num = -1 to unreg for intr */ + a1 += sizeof(struct vnic_devcmd_notify); + + vnic_dev_cmd(vdev, CMD_NOTIFY, &a0, &a1, wait); +} + +static int vnic_dev_notify_ready(struct vnic_dev *vdev) +{ + u32 *words; + unsigned int nwords = sizeof(struct vnic_devcmd_notify) / 4; + unsigned int i; + u32 csum; + + if (!vdev->notify) + return 0; + + do { + csum = 0; + memcpy(&vdev->notify_copy, vdev->notify, + sizeof(struct vnic_devcmd_notify)); + words = (u32 *)&vdev->notify_copy; + for (i = 1; i < nwords; i++) + csum += words[i]; + } while (csum != words[0]); + + return 1; +} + +int vnic_dev_init(struct vnic_dev *vdev, int arg) +{ + u64 a0 = (u32)arg, a1 = 0; + int wait = 1000; + return vnic_dev_cmd(vdev, CMD_INIT, &a0, &a1, wait); +} + +int vnic_dev_link_status(struct vnic_dev *vdev) +{ + if (vdev->linkstatus) + return *vdev->linkstatus; + + if (!vnic_dev_notify_ready(vdev)) + return 0; + + return vdev->notify_copy.link_state; +} + +u32 vnic_dev_port_speed(struct vnic_dev *vdev) +{ + if (!vnic_dev_notify_ready(vdev)) + return 0; + + return vdev->notify_copy.port_speed; +} + +u32 vnic_dev_msg_lvl(struct vnic_dev *vdev) +{ + if (!vnic_dev_notify_ready(vdev)) + return 0; + + return vdev->notify_copy.msglvl; +} + +u32 vnic_dev_mtu(struct vnic_dev *vdev) +{ + if (!vnic_dev_notify_ready(vdev)) + return 0; + + return vdev->notify_copy.mtu; +} + +u32 vnic_dev_link_down_cnt(struct vnic_dev *vdev) +{ + if (!vnic_dev_notify_ready(vdev)) + return 0; + + return vdev->notify_copy.link_down_cnt; +} + +void vnic_dev_set_intr_mode(struct vnic_dev *vdev, + enum vnic_dev_intr_mode intr_mode) +{ + vdev->intr_mode = intr_mode; +} + +enum vnic_dev_intr_mode vnic_dev_get_intr_mode( + struct vnic_dev *vdev) +{ + return vdev->intr_mode; +} + +void vnic_dev_unregister(struct vnic_dev *vdev) +{ + if (vdev) { + if (vdev->notify) + pci_free_consistent(vdev->pdev, + sizeof(struct vnic_devcmd_notify), + vdev->notify, + vdev->notify_pa); + if (vdev->linkstatus) + pci_free_consistent(vdev->pdev, + sizeof(u32), + vdev->linkstatus, + vdev->linkstatus_pa); + if (vdev->stats) + pci_free_consistent(vdev->pdev, + sizeof(struct vnic_dev), + vdev->stats, vdev->stats_pa); + if (vdev->fw_info) + pci_free_consistent(vdev->pdev, + sizeof(struct vnic_devcmd_fw_info), + vdev->fw_info, vdev->fw_info_pa); + kfree(vdev); + } +} + +struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev, + void *priv, struct pci_dev *pdev, struct vnic_dev_bar *bar) +{ + if (!vdev) { + vdev = kzalloc(sizeof(struct vnic_dev), GFP_KERNEL); + if (!vdev) + return NULL; + } + + vdev->priv = priv; + vdev->pdev = pdev; + + if (vnic_dev_discover_res(vdev, bar)) + goto err_out; + + vdev->devcmd = vnic_dev_get_res(vdev, RES_TYPE_DEVCMD, 0); + if (!vdev->devcmd) + goto err_out; + + return vdev; + +err_out: + vnic_dev_unregister(vdev); + return NULL; +} diff --git a/drivers/scsi/fnic/vnic_dev.h b/drivers/scsi/fnic/vnic_dev.h new file mode 100644 index 000000000000..f9935a8a5a09 --- /dev/null +++ b/drivers/scsi/fnic/vnic_dev.h @@ -0,0 +1,161 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_DEV_H_ +#define _VNIC_DEV_H_ + +#include "vnic_resource.h" +#include "vnic_devcmd.h" + +/* + * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth + * Driver) when both are built with CONFIG options =y + */ +#define vnic_dev_priv fnic_dev_priv +#define vnic_dev_get_res_count fnic_dev_get_res_count +#define vnic_dev_get_res fnic_dev_get_res +#define vnic_dev_desc_ring_size fnic_dev_desc_ring_siz +#define vnic_dev_clear_desc_ring fnic_dev_clear_desc_ring +#define vnic_dev_alloc_desc_ring fnic_dev_alloc_desc_ring +#define vnic_dev_free_desc_ring fnic_dev_free_desc_ring +#define vnic_dev_cmd fnic_dev_cmd +#define vnic_dev_fw_info fnic_dev_fw_info +#define vnic_dev_spec fnic_dev_spec +#define vnic_dev_stats_clear fnic_dev_stats_clear +#define vnic_dev_stats_dump fnic_dev_stats_dump +#define vnic_dev_hang_notify fnic_dev_hang_notify +#define vnic_dev_packet_filter fnic_dev_packet_filter +#define vnic_dev_add_addr fnic_dev_add_addr +#define vnic_dev_del_addr fnic_dev_del_addr +#define vnic_dev_mac_addr fnic_dev_mac_addr +#define vnic_dev_notify_set fnic_dev_notify_set +#define vnic_dev_notify_unset fnic_dev_notify_unset +#define vnic_dev_link_status fnic_dev_link_status +#define vnic_dev_port_speed fnic_dev_port_speed +#define vnic_dev_msg_lvl fnic_dev_msg_lvl +#define vnic_dev_mtu fnic_dev_mtu +#define vnic_dev_link_down_cnt fnic_dev_link_down_cnt +#define vnic_dev_close fnic_dev_close +#define vnic_dev_enable fnic_dev_enable +#define vnic_dev_disable fnic_dev_disable +#define vnic_dev_open fnic_dev_open +#define vnic_dev_open_done fnic_dev_open_done +#define vnic_dev_init fnic_dev_init +#define vnic_dev_soft_reset fnic_dev_soft_reset +#define vnic_dev_soft_reset_done fnic_dev_soft_reset_done +#define vnic_dev_set_intr_mode fnic_dev_set_intr_mode +#define vnic_dev_get_intr_mode fnic_dev_get_intr_mode +#define vnic_dev_unregister fnic_dev_unregister +#define vnic_dev_register fnic_dev_register + +#ifndef VNIC_PADDR_TARGET +#define VNIC_PADDR_TARGET 0x0000000000000000ULL +#endif + +#ifndef readq +static inline u64 readq(void __iomem *reg) +{ + return ((u64)readl(reg + 0x4UL) << 32) | (u64)readl(reg); +} + +static inline void writeq(u64 val, void __iomem *reg) +{ + writel(val & 0xffffffff, reg); + writel(val >> 32, reg + 0x4UL); +} +#endif + +enum vnic_dev_intr_mode { + VNIC_DEV_INTR_MODE_UNKNOWN, + VNIC_DEV_INTR_MODE_INTX, + VNIC_DEV_INTR_MODE_MSI, + VNIC_DEV_INTR_MODE_MSIX, +}; + +struct vnic_dev_bar { + void __iomem *vaddr; + dma_addr_t bus_addr; + unsigned long len; +}; + +struct vnic_dev_ring { + void *descs; + size_t size; + dma_addr_t base_addr; + size_t base_align; + void *descs_unaligned; + size_t size_unaligned; + dma_addr_t base_addr_unaligned; + unsigned int desc_size; + unsigned int desc_count; + unsigned int desc_avail; +}; + +struct vnic_dev; +struct vnic_stats; + +void *vnic_dev_priv(struct vnic_dev *vdev); +unsigned int vnic_dev_get_res_count(struct vnic_dev *vdev, + enum vnic_res_type type); +void __iomem *vnic_dev_get_res(struct vnic_dev *vdev, enum vnic_res_type type, + unsigned int index); +unsigned int vnic_dev_desc_ring_size(struct vnic_dev_ring *ring, + unsigned int desc_count, + unsigned int desc_size); +void vnic_dev_clear_desc_ring(struct vnic_dev_ring *ring); +int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring, + unsigned int desc_count, unsigned int desc_size); +void vnic_dev_free_desc_ring(struct vnic_dev *vdev, + struct vnic_dev_ring *ring); +int vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, + u64 *a0, u64 *a1, int wait); +int vnic_dev_fw_info(struct vnic_dev *vdev, + struct vnic_devcmd_fw_info **fw_info); +int vnic_dev_spec(struct vnic_dev *vdev, unsigned int offset, + unsigned int size, void *value); +int vnic_dev_stats_clear(struct vnic_dev *vdev); +int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats); +int vnic_dev_hang_notify(struct vnic_dev *vdev); +void vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast, + int broadcast, int promisc, int allmulti); +void vnic_dev_add_addr(struct vnic_dev *vdev, u8 *addr); +void vnic_dev_del_addr(struct vnic_dev *vdev, u8 *addr); +int vnic_dev_mac_addr(struct vnic_dev *vdev, u8 *mac_addr); +int vnic_dev_notify_set(struct vnic_dev *vdev, u16 intr); +void vnic_dev_notify_unset(struct vnic_dev *vdev); +int vnic_dev_link_status(struct vnic_dev *vdev); +u32 vnic_dev_port_speed(struct vnic_dev *vdev); +u32 vnic_dev_msg_lvl(struct vnic_dev *vdev); +u32 vnic_dev_mtu(struct vnic_dev *vdev); +u32 vnic_dev_link_down_cnt(struct vnic_dev *vdev); +int vnic_dev_close(struct vnic_dev *vdev); +int vnic_dev_enable(struct vnic_dev *vdev); +int vnic_dev_disable(struct vnic_dev *vdev); +int vnic_dev_open(struct vnic_dev *vdev, int arg); +int vnic_dev_open_done(struct vnic_dev *vdev, int *done); +int vnic_dev_init(struct vnic_dev *vdev, int arg); +int vnic_dev_soft_reset(struct vnic_dev *vdev, int arg); +int vnic_dev_soft_reset_done(struct vnic_dev *vdev, int *done); +void vnic_dev_set_intr_mode(struct vnic_dev *vdev, + enum vnic_dev_intr_mode intr_mode); +enum vnic_dev_intr_mode vnic_dev_get_intr_mode(struct vnic_dev *vdev); +void vnic_dev_unregister(struct vnic_dev *vdev); +struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev, + void *priv, struct pci_dev *pdev, + struct vnic_dev_bar *bar); + +#endif /* _VNIC_DEV_H_ */ diff --git a/drivers/scsi/fnic/vnic_devcmd.h b/drivers/scsi/fnic/vnic_devcmd.h new file mode 100644 index 000000000000..d62b9061bf12 --- /dev/null +++ b/drivers/scsi/fnic/vnic_devcmd.h @@ -0,0 +1,281 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_DEVCMD_H_ +#define _VNIC_DEVCMD_H_ + +#define _CMD_NBITS 14 +#define _CMD_VTYPEBITS 10 +#define _CMD_FLAGSBITS 6 +#define _CMD_DIRBITS 2 + +#define _CMD_NMASK ((1 << _CMD_NBITS)-1) +#define _CMD_VTYPEMASK ((1 << _CMD_VTYPEBITS)-1) +#define _CMD_FLAGSMASK ((1 << _CMD_FLAGSBITS)-1) +#define _CMD_DIRMASK ((1 << _CMD_DIRBITS)-1) + +#define _CMD_NSHIFT 0 +#define _CMD_VTYPESHIFT (_CMD_NSHIFT+_CMD_NBITS) +#define _CMD_FLAGSSHIFT (_CMD_VTYPESHIFT+_CMD_VTYPEBITS) +#define _CMD_DIRSHIFT (_CMD_FLAGSSHIFT+_CMD_FLAGSBITS) + +/* + * Direction bits (from host perspective). + */ +#define _CMD_DIR_NONE 0U +#define _CMD_DIR_WRITE 1U +#define _CMD_DIR_READ 2U +#define _CMD_DIR_RW (_CMD_DIR_WRITE | _CMD_DIR_READ) + +/* + * Flag bits. + */ +#define _CMD_FLAGS_NONE 0U +#define _CMD_FLAGS_NOWAIT 1U + +/* + * vNIC type bits. + */ +#define _CMD_VTYPE_NONE 0U +#define _CMD_VTYPE_ENET 1U +#define _CMD_VTYPE_FC 2U +#define _CMD_VTYPE_SCSI 4U +#define _CMD_VTYPE_ALL (_CMD_VTYPE_ENET | _CMD_VTYPE_FC | _CMD_VTYPE_SCSI) + +/* + * Used to create cmds.. +*/ +#define _CMDCF(dir, flags, vtype, nr) \ + (((dir) << _CMD_DIRSHIFT) | \ + ((flags) << _CMD_FLAGSSHIFT) | \ + ((vtype) << _CMD_VTYPESHIFT) | \ + ((nr) << _CMD_NSHIFT)) +#define _CMDC(dir, vtype, nr) _CMDCF(dir, 0, vtype, nr) +#define _CMDCNW(dir, vtype, nr) _CMDCF(dir, _CMD_FLAGS_NOWAIT, vtype, nr) + +/* + * Used to decode cmds.. +*/ +#define _CMD_DIR(cmd) (((cmd) >> _CMD_DIRSHIFT) & _CMD_DIRMASK) +#define _CMD_FLAGS(cmd) (((cmd) >> _CMD_FLAGSSHIFT) & _CMD_FLAGSMASK) +#define _CMD_VTYPE(cmd) (((cmd) >> _CMD_VTYPESHIFT) & _CMD_VTYPEMASK) +#define _CMD_N(cmd) (((cmd) >> _CMD_NSHIFT) & _CMD_NMASK) + +enum vnic_devcmd_cmd { + CMD_NONE = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_NONE, 0), + + /* mcpu fw info in mem: (u64)a0=paddr to struct vnic_devcmd_fw_info */ + CMD_MCPU_FW_INFO = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 1), + + /* dev-specific block member: + * in: (u16)a0=offset,(u8)a1=size + * out: a0=value */ + CMD_DEV_SPEC = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 2), + + /* stats clear */ + CMD_STATS_CLEAR = _CMDCNW(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 3), + + /* stats dump in mem: (u64)a0=paddr to stats area, + * (u16)a1=sizeof stats area */ + CMD_STATS_DUMP = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 4), + + /* set Rx packet filter: (u32)a0=filters (see CMD_PFILTER_*) */ + CMD_PACKET_FILTER = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 7), + + /* hang detection notification */ + CMD_HANG_NOTIFY = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 8), + + /* MAC address in (u48)a0 */ + CMD_MAC_ADDR = _CMDC(_CMD_DIR_READ, + _CMD_VTYPE_ENET | _CMD_VTYPE_FC, 9), + + /* disable/enable promisc mode: (u8)a0=0/1 */ +/***** XXX DEPRECATED *****/ + CMD_PROMISC_MODE = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 10), + + /* disable/enable all-multi mode: (u8)a0=0/1 */ +/***** XXX DEPRECATED *****/ + CMD_ALLMULTI_MODE = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 11), + + /* add addr from (u48)a0 */ + CMD_ADDR_ADD = _CMDCNW(_CMD_DIR_WRITE, + _CMD_VTYPE_ENET | _CMD_VTYPE_FC, 12), + + /* del addr from (u48)a0 */ + CMD_ADDR_DEL = _CMDCNW(_CMD_DIR_WRITE, + _CMD_VTYPE_ENET | _CMD_VTYPE_FC, 13), + + /* add VLAN id in (u16)a0 */ + CMD_VLAN_ADD = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 14), + + /* del VLAN id in (u16)a0 */ + CMD_VLAN_DEL = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 15), + + /* nic_cfg in (u32)a0 */ + CMD_NIC_CFG = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 16), + + /* union vnic_rss_key in mem: (u64)a0=paddr, (u16)a1=len */ + CMD_RSS_KEY = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 17), + + /* union vnic_rss_cpu in mem: (u64)a0=paddr, (u16)a1=len */ + CMD_RSS_CPU = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 18), + + /* initiate softreset */ + CMD_SOFT_RESET = _CMDCNW(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 19), + + /* softreset status: + * out: a0=0 reset complete, a0=1 reset in progress */ + CMD_SOFT_RESET_STATUS = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 20), + + /* set struct vnic_devcmd_notify buffer in mem: + * in: + * (u64)a0=paddr to notify (set paddr=0 to unset) + * (u32)a1 & 0x00000000ffffffff=sizeof(struct vnic_devcmd_notify) + * (u16)a1 & 0x0000ffff00000000=intr num (-1 for no intr) + * out: + * (u32)a1 = effective size + */ + CMD_NOTIFY = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ALL, 21), + + /* UNDI API: (u64)a0=paddr to s_PXENV_UNDI_ struct, + * (u8)a1=PXENV_UNDI_xxx */ + CMD_UNDI = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 22), + + /* initiate open sequence (u32)a0=flags (see CMD_OPENF_*) */ + CMD_OPEN = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 23), + + /* open status: + * out: a0=0 open complete, a0=1 open in progress */ + CMD_OPEN_STATUS = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 24), + + /* close vnic */ + CMD_CLOSE = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 25), + + /* initialize virtual link: (u32)a0=flags (see CMD_INITF_*) */ + CMD_INIT = _CMDCNW(_CMD_DIR_READ, _CMD_VTYPE_ALL, 26), + + /* variant of CMD_INIT, with provisioning info + * (u64)a0=paddr of vnic_devcmd_provinfo + * (u32)a1=sizeof provision info */ + CMD_INIT_PROV_INFO = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 27), + + /* enable virtual link */ + CMD_ENABLE = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 28), + + /* disable virtual link */ + CMD_DISABLE = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 29), + + /* stats dump all vnics on uplink in mem: (u64)a0=paddr (u32)a1=uif */ + CMD_STATS_DUMP_ALL = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ALL, 30), + + /* init status: + * out: a0=0 init complete, a0=1 init in progress + * if a0=0, a1=errno */ + CMD_INIT_STATUS = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ALL, 31), + + /* INT13 API: (u64)a0=paddr to vnic_int13_params struct + * (u8)a1=INT13_CMD_xxx */ + CMD_INT13 = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_FC, 32), + + /* logical uplink enable/disable: (u64)a0: 0/1=disable/enable */ + CMD_LOGICAL_UPLINK = _CMDCNW(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 33), + + /* undo initialize of virtual link */ + CMD_DEINIT = _CMDCNW(_CMD_DIR_NONE, _CMD_VTYPE_ALL, 34), +}; + +/* flags for CMD_OPEN */ +#define CMD_OPENF_OPROM 0x1 /* open coming from option rom */ + +/* flags for CMD_INIT */ +#define CMD_INITF_DEFAULT_MAC 0x1 /* init with default mac addr */ + +/* flags for CMD_PACKET_FILTER */ +#define CMD_PFILTER_DIRECTED 0x01 +#define CMD_PFILTER_MULTICAST 0x02 +#define CMD_PFILTER_BROADCAST 0x04 +#define CMD_PFILTER_PROMISCUOUS 0x08 +#define CMD_PFILTER_ALL_MULTICAST 0x10 + +enum vnic_devcmd_status { + STAT_NONE = 0, + STAT_BUSY = 1 << 0, /* cmd in progress */ + STAT_ERROR = 1 << 1, /* last cmd caused error (code in a0) */ +}; + +enum vnic_devcmd_error { + ERR_SUCCESS = 0, + ERR_EINVAL = 1, + ERR_EFAULT = 2, + ERR_EPERM = 3, + ERR_EBUSY = 4, + ERR_ECMDUNKNOWN = 5, + ERR_EBADSTATE = 6, + ERR_ENOMEM = 7, + ERR_ETIMEDOUT = 8, + ERR_ELINKDOWN = 9, +}; + +struct vnic_devcmd_fw_info { + char fw_version[32]; + char fw_build[32]; + char hw_version[32]; + char hw_serial_number[32]; +}; + +struct vnic_devcmd_notify { + u32 csum; /* checksum over following words */ + + u32 link_state; /* link up == 1 */ + u32 port_speed; /* effective port speed (rate limit) */ + u32 mtu; /* MTU */ + u32 msglvl; /* requested driver msg lvl */ + u32 uif; /* uplink interface */ + u32 status; /* status bits (see VNIC_STF_*) */ + u32 error; /* error code (see ERR_*) for first ERR */ + u32 link_down_cnt; /* running count of link down transitions */ +}; +#define VNIC_STF_FATAL_ERR 0x0001 /* fatal fw error */ + +struct vnic_devcmd_provinfo { + u8 oui[3]; + u8 type; + u8 data[0]; +}; + +/* + * Writing cmd register causes STAT_BUSY to get set in status register. + * When cmd completes, STAT_BUSY will be cleared. + * + * If cmd completed successfully STAT_ERROR will be clear + * and args registers contain cmd-specific results. + * + * If cmd error, STAT_ERROR will be set and args[0] contains error code. + * + * status register is read-only. While STAT_BUSY is set, + * all other register contents are read-only. + */ + +/* Make sizeof(vnic_devcmd) a power-of-2 for I/O BAR. */ +#define VNIC_DEVCMD_NARGS 15 +struct vnic_devcmd { + u32 status; /* RO */ + u32 cmd; /* RW */ + u64 args[VNIC_DEVCMD_NARGS]; /* RW cmd args (little-endian) */ +}; + +#endif /* _VNIC_DEVCMD_H_ */ diff --git a/drivers/scsi/fnic/vnic_intr.c b/drivers/scsi/fnic/vnic_intr.c new file mode 100644 index 000000000000..4f4dc8793d23 --- /dev/null +++ b/drivers/scsi/fnic/vnic_intr.c @@ -0,0 +1,60 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include "vnic_dev.h" +#include "vnic_intr.h" + +void vnic_intr_free(struct vnic_intr *intr) +{ + intr->ctrl = NULL; +} + +int vnic_intr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr, + unsigned int index) +{ + intr->index = index; + intr->vdev = vdev; + + intr->ctrl = vnic_dev_get_res(vdev, RES_TYPE_INTR_CTRL, index); + if (!intr->ctrl) { + printk(KERN_ERR "Failed to hook INTR[%d].ctrl resource\n", + index); + return -EINVAL; + } + + return 0; +} + +void vnic_intr_init(struct vnic_intr *intr, unsigned int coalescing_timer, + unsigned int coalescing_type, unsigned int mask_on_assertion) +{ + iowrite32(coalescing_timer, &intr->ctrl->coalescing_timer); + iowrite32(coalescing_type, &intr->ctrl->coalescing_type); + iowrite32(mask_on_assertion, &intr->ctrl->mask_on_assertion); + iowrite32(0, &intr->ctrl->int_credits); +} + +void vnic_intr_clean(struct vnic_intr *intr) +{ + iowrite32(0, &intr->ctrl->int_credits); +} diff --git a/drivers/scsi/fnic/vnic_intr.h b/drivers/scsi/fnic/vnic_intr.h new file mode 100644 index 000000000000..d5fb40e7c98e --- /dev/null +++ b/drivers/scsi/fnic/vnic_intr.h @@ -0,0 +1,118 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_INTR_H_ +#define _VNIC_INTR_H_ + +#include <linux/pci.h> +#include "vnic_dev.h" + +/* + * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth + * Driver) when both are built with CONFIG options =y + */ +#define vnic_intr_unmask fnic_intr_unmask +#define vnic_intr_mask fnic_intr_mask +#define vnic_intr_return_credits fnic_intr_return_credits +#define vnic_intr_credits fnic_intr_credits +#define vnic_intr_return_all_credits fnic_intr_return_all_credits +#define vnic_intr_legacy_pba fnic_intr_legacy_pba +#define vnic_intr_free fnic_intr_free +#define vnic_intr_alloc fnic_intr_alloc +#define vnic_intr_init fnic_intr_init +#define vnic_intr_clean fnic_intr_clean + +#define VNIC_INTR_TIMER_MAX 0xffff + +#define VNIC_INTR_TIMER_TYPE_ABS 0 +#define VNIC_INTR_TIMER_TYPE_QUIET 1 + +/* Interrupt control */ +struct vnic_intr_ctrl { + u32 coalescing_timer; /* 0x00 */ + u32 pad0; + u32 coalescing_value; /* 0x08 */ + u32 pad1; + u32 coalescing_type; /* 0x10 */ + u32 pad2; + u32 mask_on_assertion; /* 0x18 */ + u32 pad3; + u32 mask; /* 0x20 */ + u32 pad4; + u32 int_credits; /* 0x28 */ + u32 pad5; + u32 int_credit_return; /* 0x30 */ + u32 pad6; +}; + +struct vnic_intr { + unsigned int index; + struct vnic_dev *vdev; + struct vnic_intr_ctrl __iomem *ctrl; /* memory-mapped */ +}; + +static inline void vnic_intr_unmask(struct vnic_intr *intr) +{ + iowrite32(0, &intr->ctrl->mask); +} + +static inline void vnic_intr_mask(struct vnic_intr *intr) +{ + iowrite32(1, &intr->ctrl->mask); +} + +static inline void vnic_intr_return_credits(struct vnic_intr *intr, + unsigned int credits, int unmask, int reset_timer) +{ +#define VNIC_INTR_UNMASK_SHIFT 16 +#define VNIC_INTR_RESET_TIMER_SHIFT 17 + + u32 int_credit_return = (credits & 0xffff) | + (unmask ? (1 << VNIC_INTR_UNMASK_SHIFT) : 0) | + (reset_timer ? (1 << VNIC_INTR_RESET_TIMER_SHIFT) : 0); + + iowrite32(int_credit_return, &intr->ctrl->int_credit_return); +} + +static inline unsigned int vnic_intr_credits(struct vnic_intr *intr) +{ + return ioread32(&intr->ctrl->int_credits); +} + +static inline void vnic_intr_return_all_credits(struct vnic_intr *intr) +{ + unsigned int credits = vnic_intr_credits(intr); + int unmask = 1; + int reset_timer = 1; + + vnic_intr_return_credits(intr, credits, unmask, reset_timer); +} + +static inline u32 vnic_intr_legacy_pba(u32 __iomem *legacy_pba) +{ + /* read PBA without clearing */ + return ioread32(legacy_pba); +} + +void vnic_intr_free(struct vnic_intr *intr); +int vnic_intr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr, + unsigned int index); +void vnic_intr_init(struct vnic_intr *intr, unsigned int coalescing_timer, + unsigned int coalescing_type, unsigned int mask_on_assertion); +void vnic_intr_clean(struct vnic_intr *intr); + +#endif /* _VNIC_INTR_H_ */ diff --git a/drivers/scsi/fnic/vnic_nic.h b/drivers/scsi/fnic/vnic_nic.h new file mode 100644 index 000000000000..f15b83eeaced --- /dev/null +++ b/drivers/scsi/fnic/vnic_nic.h @@ -0,0 +1,69 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_NIC_H_ +#define _VNIC_NIC_H_ + +/* + * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth + * Driver) when both are built with CONFIG options =y + */ +#define vnic_set_nic_cfg fnic_set_nic_cfg + +#define NIC_CFG_RSS_DEFAULT_CPU_MASK_FIELD 0xffUL +#define NIC_CFG_RSS_DEFAULT_CPU_SHIFT 0 +#define NIC_CFG_RSS_HASH_TYPE (0xffUL << 8) +#define NIC_CFG_RSS_HASH_TYPE_MASK_FIELD 0xffUL +#define NIC_CFG_RSS_HASH_TYPE_SHIFT 8 +#define NIC_CFG_RSS_HASH_BITS (7UL << 16) +#define NIC_CFG_RSS_HASH_BITS_MASK_FIELD 7UL +#define NIC_CFG_RSS_HASH_BITS_SHIFT 16 +#define NIC_CFG_RSS_BASE_CPU (7UL << 19) +#define NIC_CFG_RSS_BASE_CPU_MASK_FIELD 7UL +#define NIC_CFG_RSS_BASE_CPU_SHIFT 19 +#define NIC_CFG_RSS_ENABLE (1UL << 22) +#define NIC_CFG_RSS_ENABLE_MASK_FIELD 1UL +#define NIC_CFG_RSS_ENABLE_SHIFT 22 +#define NIC_CFG_TSO_IPID_SPLIT_EN (1UL << 23) +#define NIC_CFG_TSO_IPID_SPLIT_EN_MASK_FIELD 1UL +#define NIC_CFG_TSO_IPID_SPLIT_EN_SHIFT 23 +#define NIC_CFG_IG_VLAN_STRIP_EN (1UL << 24) +#define NIC_CFG_IG_VLAN_STRIP_EN_MASK_FIELD 1UL +#define NIC_CFG_IG_VLAN_STRIP_EN_SHIFT 24 + +static inline void vnic_set_nic_cfg(u32 *nic_cfg, + u8 rss_default_cpu, u8 rss_hash_type, + u8 rss_hash_bits, u8 rss_base_cpu, + u8 rss_enable, u8 tso_ipid_split_en, + u8 ig_vlan_strip_en) +{ + *nic_cfg = (rss_default_cpu & NIC_CFG_RSS_DEFAULT_CPU_MASK_FIELD) | + ((rss_hash_type & NIC_CFG_RSS_HASH_TYPE_MASK_FIELD) + << NIC_CFG_RSS_HASH_TYPE_SHIFT) | + ((rss_hash_bits & NIC_CFG_RSS_HASH_BITS_MASK_FIELD) + << NIC_CFG_RSS_HASH_BITS_SHIFT) | + ((rss_base_cpu & NIC_CFG_RSS_BASE_CPU_MASK_FIELD) + << NIC_CFG_RSS_BASE_CPU_SHIFT) | + ((rss_enable & NIC_CFG_RSS_ENABLE_MASK_FIELD) + << NIC_CFG_RSS_ENABLE_SHIFT) | + ((tso_ipid_split_en & NIC_CFG_TSO_IPID_SPLIT_EN_MASK_FIELD) + << NIC_CFG_TSO_IPID_SPLIT_EN_SHIFT) | + ((ig_vlan_strip_en & NIC_CFG_IG_VLAN_STRIP_EN_MASK_FIELD) + << NIC_CFG_IG_VLAN_STRIP_EN_SHIFT); +} + +#endif /* _VNIC_NIC_H_ */ diff --git a/drivers/scsi/fnic/vnic_resource.h b/drivers/scsi/fnic/vnic_resource.h new file mode 100644 index 000000000000..2d842f79d41a --- /dev/null +++ b/drivers/scsi/fnic/vnic_resource.h @@ -0,0 +1,61 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_RESOURCE_H_ +#define _VNIC_RESOURCE_H_ + +#define VNIC_RES_MAGIC 0x766E6963L /* 'vnic' */ +#define VNIC_RES_VERSION 0x00000000L + +/* vNIC resource types */ +enum vnic_res_type { + RES_TYPE_EOL, /* End-of-list */ + RES_TYPE_WQ, /* Work queues */ + RES_TYPE_RQ, /* Receive queues */ + RES_TYPE_CQ, /* Completion queues */ + RES_TYPE_RSVD1, + RES_TYPE_NIC_CFG, /* Enet NIC config registers */ + RES_TYPE_RSVD2, + RES_TYPE_RSVD3, + RES_TYPE_RSVD4, + RES_TYPE_RSVD5, + RES_TYPE_INTR_CTRL, /* Interrupt ctrl table */ + RES_TYPE_INTR_TABLE, /* MSI/MSI-X Interrupt table */ + RES_TYPE_INTR_PBA, /* MSI/MSI-X PBA table */ + RES_TYPE_INTR_PBA_LEGACY, /* Legacy intr status */ + RES_TYPE_RSVD6, + RES_TYPE_RSVD7, + RES_TYPE_DEVCMD, /* Device command region */ + RES_TYPE_PASS_THRU_PAGE, /* Pass-thru page */ + + RES_TYPE_MAX, /* Count of resource types */ +}; + +struct vnic_resource_header { + u32 magic; + u32 version; +}; + +struct vnic_resource { + u8 type; + u8 bar; + u8 pad[2]; + u32 bar_offset; + u32 count; +}; + +#endif /* _VNIC_RESOURCE_H_ */ diff --git a/drivers/scsi/fnic/vnic_rq.c b/drivers/scsi/fnic/vnic_rq.c new file mode 100644 index 000000000000..bedd0d285630 --- /dev/null +++ b/drivers/scsi/fnic/vnic_rq.c @@ -0,0 +1,196 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include "vnic_dev.h" +#include "vnic_rq.h" + +static int vnic_rq_alloc_bufs(struct vnic_rq *rq) +{ + struct vnic_rq_buf *buf; + struct vnic_dev *vdev; + unsigned int i, j, count = rq->ring.desc_count; + unsigned int blks = VNIC_RQ_BUF_BLKS_NEEDED(count); + + vdev = rq->vdev; + + for (i = 0; i < blks; i++) { + rq->bufs[i] = kzalloc(VNIC_RQ_BUF_BLK_SZ, GFP_ATOMIC); + if (!rq->bufs[i]) { + printk(KERN_ERR "Failed to alloc rq_bufs\n"); + return -ENOMEM; + } + } + + for (i = 0; i < blks; i++) { + buf = rq->bufs[i]; + for (j = 0; j < VNIC_RQ_BUF_BLK_ENTRIES; j++) { + buf->index = i * VNIC_RQ_BUF_BLK_ENTRIES + j; + buf->desc = (u8 *)rq->ring.descs + + rq->ring.desc_size * buf->index; + if (buf->index + 1 == count) { + buf->next = rq->bufs[0]; + break; + } else if (j + 1 == VNIC_RQ_BUF_BLK_ENTRIES) { + buf->next = rq->bufs[i + 1]; + } else { + buf->next = buf + 1; + buf++; + } + } + } + + rq->to_use = rq->to_clean = rq->bufs[0]; + rq->buf_index = 0; + + return 0; +} + +void vnic_rq_free(struct vnic_rq *rq) +{ + struct vnic_dev *vdev; + unsigned int i; + + vdev = rq->vdev; + + vnic_dev_free_desc_ring(vdev, &rq->ring); + + for (i = 0; i < VNIC_RQ_BUF_BLKS_MAX; i++) { + kfree(rq->bufs[i]); + rq->bufs[i] = NULL; + } + + rq->ctrl = NULL; +} + +int vnic_rq_alloc(struct vnic_dev *vdev, struct vnic_rq *rq, unsigned int index, + unsigned int desc_count, unsigned int desc_size) +{ + int err; + + rq->index = index; + rq->vdev = vdev; + + rq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_RQ, index); + if (!rq->ctrl) { + printk(KERN_ERR "Failed to hook RQ[%d] resource\n", index); + return -EINVAL; + } + + vnic_rq_disable(rq); + + err = vnic_dev_alloc_desc_ring(vdev, &rq->ring, desc_count, desc_size); + if (err) + return err; + + err = vnic_rq_alloc_bufs(rq); + if (err) { + vnic_rq_free(rq); + return err; + } + + return 0; +} + +void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset) +{ + u64 paddr; + u32 fetch_index; + + paddr = (u64)rq->ring.base_addr | VNIC_PADDR_TARGET; + writeq(paddr, &rq->ctrl->ring_base); + iowrite32(rq->ring.desc_count, &rq->ctrl->ring_size); + iowrite32(cq_index, &rq->ctrl->cq_index); + iowrite32(error_interrupt_enable, &rq->ctrl->error_interrupt_enable); + iowrite32(error_interrupt_offset, &rq->ctrl->error_interrupt_offset); + iowrite32(0, &rq->ctrl->dropped_packet_count); + iowrite32(0, &rq->ctrl->error_status); + + /* Use current fetch_index as the ring starting point */ + fetch_index = ioread32(&rq->ctrl->fetch_index); + rq->to_use = rq->to_clean = + &rq->bufs[fetch_index / VNIC_RQ_BUF_BLK_ENTRIES] + [fetch_index % VNIC_RQ_BUF_BLK_ENTRIES]; + iowrite32(fetch_index, &rq->ctrl->posted_index); + + rq->buf_index = 0; +} + +unsigned int vnic_rq_error_status(struct vnic_rq *rq) +{ + return ioread32(&rq->ctrl->error_status); +} + +void vnic_rq_enable(struct vnic_rq *rq) +{ + iowrite32(1, &rq->ctrl->enable); +} + +int vnic_rq_disable(struct vnic_rq *rq) +{ + unsigned int wait; + + iowrite32(0, &rq->ctrl->enable); + + /* Wait for HW to ACK disable request */ + for (wait = 0; wait < 100; wait++) { + if (!(ioread32(&rq->ctrl->running))) + return 0; + udelay(1); + } + + printk(KERN_ERR "Failed to disable RQ[%d]\n", rq->index); + + return -ETIMEDOUT; +} + +void vnic_rq_clean(struct vnic_rq *rq, + void (*buf_clean)(struct vnic_rq *rq, struct vnic_rq_buf *buf)) +{ + struct vnic_rq_buf *buf; + u32 fetch_index; + + BUG_ON(ioread32(&rq->ctrl->enable)); + + buf = rq->to_clean; + + while (vnic_rq_desc_used(rq) > 0) { + + (*buf_clean)(rq, buf); + + buf = rq->to_clean = buf->next; + rq->ring.desc_avail++; + } + + /* Use current fetch_index as the ring starting point */ + fetch_index = ioread32(&rq->ctrl->fetch_index); + rq->to_use = rq->to_clean = + &rq->bufs[fetch_index / VNIC_RQ_BUF_BLK_ENTRIES] + [fetch_index % VNIC_RQ_BUF_BLK_ENTRIES]; + iowrite32(fetch_index, &rq->ctrl->posted_index); + + rq->buf_index = 0; + + vnic_dev_clear_desc_ring(&rq->ring); +} + diff --git a/drivers/scsi/fnic/vnic_rq.h b/drivers/scsi/fnic/vnic_rq.h new file mode 100644 index 000000000000..aebdfbd6ad3c --- /dev/null +++ b/drivers/scsi/fnic/vnic_rq.h @@ -0,0 +1,235 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_RQ_H_ +#define _VNIC_RQ_H_ + +#include <linux/pci.h> +#include "vnic_dev.h" +#include "vnic_cq.h" + +/* + * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth + * Driver) when both are built with CONFIG options =y + */ +#define vnic_rq_desc_avail fnic_rq_desc_avail +#define vnic_rq_desc_used fnic_rq_desc_used +#define vnic_rq_next_desc fnic_rq_next_desc +#define vnic_rq_next_index fnic_rq_next_index +#define vnic_rq_next_buf_index fnic_rq_next_buf_index +#define vnic_rq_post fnic_rq_post +#define vnic_rq_posting_soon fnic_rq_posting_soon +#define vnic_rq_return_descs fnic_rq_return_descs +#define vnic_rq_service fnic_rq_service +#define vnic_rq_fill fnic_rq_fill +#define vnic_rq_free fnic_rq_free +#define vnic_rq_alloc fnic_rq_alloc +#define vnic_rq_init fnic_rq_init +#define vnic_rq_error_status fnic_rq_error_status +#define vnic_rq_enable fnic_rq_enable +#define vnic_rq_disable fnic_rq_disable +#define vnic_rq_clean fnic_rq_clean + +/* Receive queue control */ +struct vnic_rq_ctrl { + u64 ring_base; /* 0x00 */ + u32 ring_size; /* 0x08 */ + u32 pad0; + u32 posted_index; /* 0x10 */ + u32 pad1; + u32 cq_index; /* 0x18 */ + u32 pad2; + u32 enable; /* 0x20 */ + u32 pad3; + u32 running; /* 0x28 */ + u32 pad4; + u32 fetch_index; /* 0x30 */ + u32 pad5; + u32 error_interrupt_enable; /* 0x38 */ + u32 pad6; + u32 error_interrupt_offset; /* 0x40 */ + u32 pad7; + u32 error_status; /* 0x48 */ + u32 pad8; + u32 dropped_packet_count; /* 0x50 */ + u32 pad9; + u32 dropped_packet_count_rc; /* 0x58 */ + u32 pad10; +}; + +/* Break the vnic_rq_buf allocations into blocks of 64 entries */ +#define VNIC_RQ_BUF_BLK_ENTRIES 64 +#define VNIC_RQ_BUF_BLK_SZ \ + (VNIC_RQ_BUF_BLK_ENTRIES * sizeof(struct vnic_rq_buf)) +#define VNIC_RQ_BUF_BLKS_NEEDED(entries) \ + DIV_ROUND_UP(entries, VNIC_RQ_BUF_BLK_ENTRIES) +#define VNIC_RQ_BUF_BLKS_MAX VNIC_RQ_BUF_BLKS_NEEDED(4096) + +struct vnic_rq_buf { + struct vnic_rq_buf *next; + dma_addr_t dma_addr; + void *os_buf; + unsigned int os_buf_index; + unsigned int len; + unsigned int index; + void *desc; +}; + +struct vnic_rq { + unsigned int index; + struct vnic_dev *vdev; + struct vnic_rq_ctrl __iomem *ctrl; /* memory-mapped */ + struct vnic_dev_ring ring; + struct vnic_rq_buf *bufs[VNIC_RQ_BUF_BLKS_MAX]; + struct vnic_rq_buf *to_use; + struct vnic_rq_buf *to_clean; + void *os_buf_head; + unsigned int buf_index; + unsigned int pkts_outstanding; +}; + +static inline unsigned int vnic_rq_desc_avail(struct vnic_rq *rq) +{ + /* how many does SW own? */ + return rq->ring.desc_avail; +} + +static inline unsigned int vnic_rq_desc_used(struct vnic_rq *rq) +{ + /* how many does HW own? */ + return rq->ring.desc_count - rq->ring.desc_avail - 1; +} + +static inline void *vnic_rq_next_desc(struct vnic_rq *rq) +{ + return rq->to_use->desc; +} + +static inline unsigned int vnic_rq_next_index(struct vnic_rq *rq) +{ + return rq->to_use->index; +} + +static inline unsigned int vnic_rq_next_buf_index(struct vnic_rq *rq) +{ + return rq->buf_index++; +} + +static inline void vnic_rq_post(struct vnic_rq *rq, + void *os_buf, unsigned int os_buf_index, + dma_addr_t dma_addr, unsigned int len) +{ + struct vnic_rq_buf *buf = rq->to_use; + + buf->os_buf = os_buf; + buf->os_buf_index = os_buf_index; + buf->dma_addr = dma_addr; + buf->len = len; + + buf = buf->next; + rq->to_use = buf; + rq->ring.desc_avail--; + + /* Move the posted_index every nth descriptor + */ + +#ifndef VNIC_RQ_RETURN_RATE +#define VNIC_RQ_RETURN_RATE 0xf /* keep 2^n - 1 */ +#endif + + if ((buf->index & VNIC_RQ_RETURN_RATE) == 0) { + /* Adding write memory barrier prevents compiler and/or CPU + * reordering, thus avoiding descriptor posting before + * descriptor is initialized. Otherwise, hardware can read + * stale descriptor fields. + */ + wmb(); + iowrite32(buf->index, &rq->ctrl->posted_index); + } +} + +static inline int vnic_rq_posting_soon(struct vnic_rq *rq) +{ + return (rq->to_use->index & VNIC_RQ_RETURN_RATE) == 0; +} + +static inline void vnic_rq_return_descs(struct vnic_rq *rq, unsigned int count) +{ + rq->ring.desc_avail += count; +} + +enum desc_return_options { + VNIC_RQ_RETURN_DESC, + VNIC_RQ_DEFER_RETURN_DESC, +}; + +static inline void vnic_rq_service(struct vnic_rq *rq, + struct cq_desc *cq_desc, u16 completed_index, + int desc_return, void (*buf_service)(struct vnic_rq *rq, + struct cq_desc *cq_desc, struct vnic_rq_buf *buf, + int skipped, void *opaque), void *opaque) +{ + struct vnic_rq_buf *buf; + int skipped; + + buf = rq->to_clean; + while (1) { + + skipped = (buf->index != completed_index); + + (*buf_service)(rq, cq_desc, buf, skipped, opaque); + + if (desc_return == VNIC_RQ_RETURN_DESC) + rq->ring.desc_avail++; + + rq->to_clean = buf->next; + + if (!skipped) + break; + + buf = rq->to_clean; + } +} + +static inline int vnic_rq_fill(struct vnic_rq *rq, + int (*buf_fill)(struct vnic_rq *rq)) +{ + int err; + + while (vnic_rq_desc_avail(rq) > 1) { + + err = (*buf_fill)(rq); + if (err) + return err; + } + + return 0; +} + +void vnic_rq_free(struct vnic_rq *rq); +int vnic_rq_alloc(struct vnic_dev *vdev, struct vnic_rq *rq, unsigned int index, + unsigned int desc_count, unsigned int desc_size); +void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset); +unsigned int vnic_rq_error_status(struct vnic_rq *rq); +void vnic_rq_enable(struct vnic_rq *rq); +int vnic_rq_disable(struct vnic_rq *rq); +void vnic_rq_clean(struct vnic_rq *rq, + void (*buf_clean)(struct vnic_rq *rq, struct vnic_rq_buf *buf)); + +#endif /* _VNIC_RQ_H_ */ diff --git a/drivers/scsi/fnic/vnic_scsi.h b/drivers/scsi/fnic/vnic_scsi.h new file mode 100644 index 000000000000..46baa5254001 --- /dev/null +++ b/drivers/scsi/fnic/vnic_scsi.h @@ -0,0 +1,99 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_SCSI_H_ +#define _VNIC_SCSI_H_ + +#define VNIC_FNIC_WQ_COPY_COUNT_MIN 1 +#define VNIC_FNIC_WQ_COPY_COUNT_MAX 1 + +#define VNIC_FNIC_WQ_DESCS_MIN 64 +#define VNIC_FNIC_WQ_DESCS_MAX 128 + +#define VNIC_FNIC_WQ_COPY_DESCS_MIN 64 +#define VNIC_FNIC_WQ_COPY_DESCS_MAX 512 + +#define VNIC_FNIC_RQ_DESCS_MIN 64 +#define VNIC_FNIC_RQ_DESCS_MAX 128 + +#define VNIC_FNIC_EDTOV_MIN 1000 +#define VNIC_FNIC_EDTOV_MAX 255000 +#define VNIC_FNIC_EDTOV_DEF 2000 + +#define VNIC_FNIC_RATOV_MIN 1000 +#define VNIC_FNIC_RATOV_MAX 255000 + +#define VNIC_FNIC_MAXDATAFIELDSIZE_MIN 256 +#define VNIC_FNIC_MAXDATAFIELDSIZE_MAX 2112 + +#define VNIC_FNIC_FLOGI_RETRIES_MIN 0 +#define VNIC_FNIC_FLOGI_RETRIES_MAX 0xffffffff +#define VNIC_FNIC_FLOGI_RETRIES_DEF 0xffffffff + +#define VNIC_FNIC_FLOGI_TIMEOUT_MIN 1000 +#define VNIC_FNIC_FLOGI_TIMEOUT_MAX 255000 + +#define VNIC_FNIC_PLOGI_RETRIES_MIN 0 +#define VNIC_FNIC_PLOGI_RETRIES_MAX 255 +#define VNIC_FNIC_PLOGI_RETRIES_DEF 8 + +#define VNIC_FNIC_PLOGI_TIMEOUT_MIN 1000 +#define VNIC_FNIC_PLOGI_TIMEOUT_MAX 255000 + +#define VNIC_FNIC_IO_THROTTLE_COUNT_MIN 256 +#define VNIC_FNIC_IO_THROTTLE_COUNT_MAX 4096 + +#define VNIC_FNIC_LINK_DOWN_TIMEOUT_MIN 0 +#define VNIC_FNIC_LINK_DOWN_TIMEOUT_MAX 240000 + +#define VNIC_FNIC_PORT_DOWN_TIMEOUT_MIN 0 +#define VNIC_FNIC_PORT_DOWN_TIMEOUT_MAX 240000 + +#define VNIC_FNIC_PORT_DOWN_IO_RETRIES_MIN 0 +#define VNIC_FNIC_PORT_DOWN_IO_RETRIES_MAX 255 + +#define VNIC_FNIC_LUNS_PER_TARGET_MIN 1 +#define VNIC_FNIC_LUNS_PER_TARGET_MAX 1024 + +/* Device-specific region: scsi configuration */ +struct vnic_fc_config { + u64 node_wwn; + u64 port_wwn; + u32 flags; + u32 wq_enet_desc_count; + u32 wq_copy_desc_count; + u32 rq_desc_count; + u32 flogi_retries; + u32 flogi_timeout; + u32 plogi_retries; + u32 plogi_timeout; + u32 io_throttle_count; + u32 link_down_timeout; + u32 port_down_timeout; + u32 port_down_io_retries; + u32 luns_per_tgt; + u16 maxdatafieldsize; + u16 ed_tov; + u16 ra_tov; + u16 intr_timer; + u8 intr_timer_type; +}; + +#define VFCF_FCP_SEQ_LVL_ERR 0x1 /* Enable FCP-2 Error Recovery */ +#define VFCF_PERBI 0x2 /* persistent binding info available */ + +#endif /* _VNIC_SCSI_H_ */ diff --git a/drivers/scsi/fnic/vnic_stats.h b/drivers/scsi/fnic/vnic_stats.h new file mode 100644 index 000000000000..5372e23c1cb3 --- /dev/null +++ b/drivers/scsi/fnic/vnic_stats.h @@ -0,0 +1,68 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_STATS_H_ +#define _VNIC_STATS_H_ + +/* Tx statistics */ +struct vnic_tx_stats { + u64 tx_frames_ok; + u64 tx_unicast_frames_ok; + u64 tx_multicast_frames_ok; + u64 tx_broadcast_frames_ok; + u64 tx_bytes_ok; + u64 tx_unicast_bytes_ok; + u64 tx_multicast_bytes_ok; + u64 tx_broadcast_bytes_ok; + u64 tx_drops; + u64 tx_errors; + u64 tx_tso; + u64 rsvd[16]; +}; + +/* Rx statistics */ +struct vnic_rx_stats { + u64 rx_frames_ok; + u64 rx_frames_total; + u64 rx_unicast_frames_ok; + u64 rx_multicast_frames_ok; + u64 rx_broadcast_frames_ok; + u64 rx_bytes_ok; + u64 rx_unicast_bytes_ok; + u64 rx_multicast_bytes_ok; + u64 rx_broadcast_bytes_ok; + u64 rx_drop; + u64 rx_no_bufs; + u64 rx_errors; + u64 rx_rss; + u64 rx_crc_errors; + u64 rx_frames_64; + u64 rx_frames_127; + u64 rx_frames_255; + u64 rx_frames_511; + u64 rx_frames_1023; + u64 rx_frames_1518; + u64 rx_frames_to_max; + u64 rsvd[16]; +}; + +struct vnic_stats { + struct vnic_tx_stats tx; + struct vnic_rx_stats rx; +}; + +#endif /* _VNIC_STATS_H_ */ diff --git a/drivers/scsi/fnic/vnic_wq.c b/drivers/scsi/fnic/vnic_wq.c new file mode 100644 index 000000000000..1f9ea790d130 --- /dev/null +++ b/drivers/scsi/fnic/vnic_wq.c @@ -0,0 +1,182 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include "vnic_dev.h" +#include "vnic_wq.h" + +static int vnic_wq_alloc_bufs(struct vnic_wq *wq) +{ + struct vnic_wq_buf *buf; + struct vnic_dev *vdev; + unsigned int i, j, count = wq->ring.desc_count; + unsigned int blks = VNIC_WQ_BUF_BLKS_NEEDED(count); + + vdev = wq->vdev; + + for (i = 0; i < blks; i++) { + wq->bufs[i] = kzalloc(VNIC_WQ_BUF_BLK_SZ, GFP_ATOMIC); + if (!wq->bufs[i]) { + printk(KERN_ERR "Failed to alloc wq_bufs\n"); + return -ENOMEM; + } + } + + for (i = 0; i < blks; i++) { + buf = wq->bufs[i]; + for (j = 0; j < VNIC_WQ_BUF_BLK_ENTRIES; j++) { + buf->index = i * VNIC_WQ_BUF_BLK_ENTRIES + j; + buf->desc = (u8 *)wq->ring.descs + + wq->ring.desc_size * buf->index; + if (buf->index + 1 == count) { + buf->next = wq->bufs[0]; + break; + } else if (j + 1 == VNIC_WQ_BUF_BLK_ENTRIES) { + buf->next = wq->bufs[i + 1]; + } else { + buf->next = buf + 1; + buf++; + } + } + } + + wq->to_use = wq->to_clean = wq->bufs[0]; + + return 0; +} + +void vnic_wq_free(struct vnic_wq *wq) +{ + struct vnic_dev *vdev; + unsigned int i; + + vdev = wq->vdev; + + vnic_dev_free_desc_ring(vdev, &wq->ring); + + for (i = 0; i < VNIC_WQ_BUF_BLKS_MAX; i++) { + kfree(wq->bufs[i]); + wq->bufs[i] = NULL; + } + + wq->ctrl = NULL; + +} + +int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index, + unsigned int desc_count, unsigned int desc_size) +{ + int err; + + wq->index = index; + wq->vdev = vdev; + + wq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_WQ, index); + if (!wq->ctrl) { + printk(KERN_ERR "Failed to hook WQ[%d] resource\n", index); + return -EINVAL; + } + + vnic_wq_disable(wq); + + err = vnic_dev_alloc_desc_ring(vdev, &wq->ring, desc_count, desc_size); + if (err) + return err; + + err = vnic_wq_alloc_bufs(wq); + if (err) { + vnic_wq_free(wq); + return err; + } + + return 0; +} + +void vnic_wq_init(struct vnic_wq *wq, unsigned int cq_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset) +{ + u64 paddr; + + paddr = (u64)wq->ring.base_addr | VNIC_PADDR_TARGET; + writeq(paddr, &wq->ctrl->ring_base); + iowrite32(wq->ring.desc_count, &wq->ctrl->ring_size); + iowrite32(0, &wq->ctrl->fetch_index); + iowrite32(0, &wq->ctrl->posted_index); + iowrite32(cq_index, &wq->ctrl->cq_index); + iowrite32(error_interrupt_enable, &wq->ctrl->error_interrupt_enable); + iowrite32(error_interrupt_offset, &wq->ctrl->error_interrupt_offset); + iowrite32(0, &wq->ctrl->error_status); +} + +unsigned int vnic_wq_error_status(struct vnic_wq *wq) +{ + return ioread32(&wq->ctrl->error_status); +} + +void vnic_wq_enable(struct vnic_wq *wq) +{ + iowrite32(1, &wq->ctrl->enable); +} + +int vnic_wq_disable(struct vnic_wq *wq) +{ + unsigned int wait; + + iowrite32(0, &wq->ctrl->enable); + + /* Wait for HW to ACK disable request */ + for (wait = 0; wait < 100; wait++) { + if (!(ioread32(&wq->ctrl->running))) + return 0; + udelay(1); + } + + printk(KERN_ERR "Failed to disable WQ[%d]\n", wq->index); + + return -ETIMEDOUT; +} + +void vnic_wq_clean(struct vnic_wq *wq, + void (*buf_clean)(struct vnic_wq *wq, struct vnic_wq_buf *buf)) +{ + struct vnic_wq_buf *buf; + + BUG_ON(ioread32(&wq->ctrl->enable)); + + buf = wq->to_clean; + + while (vnic_wq_desc_used(wq) > 0) { + + (*buf_clean)(wq, buf); + + buf = wq->to_clean = buf->next; + wq->ring.desc_avail++; + } + + wq->to_use = wq->to_clean = wq->bufs[0]; + + iowrite32(0, &wq->ctrl->fetch_index); + iowrite32(0, &wq->ctrl->posted_index); + iowrite32(0, &wq->ctrl->error_status); + + vnic_dev_clear_desc_ring(&wq->ring); +} diff --git a/drivers/scsi/fnic/vnic_wq.h b/drivers/scsi/fnic/vnic_wq.h new file mode 100644 index 000000000000..5cd094f79281 --- /dev/null +++ b/drivers/scsi/fnic/vnic_wq.h @@ -0,0 +1,175 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_WQ_H_ +#define _VNIC_WQ_H_ + +#include <linux/pci.h> +#include "vnic_dev.h" +#include "vnic_cq.h" + +/* + * These defines avoid symbol clash between fnic and enic (Cisco 10G Eth + * Driver) when both are built with CONFIG options =y + */ +#define vnic_wq_desc_avail fnic_wq_desc_avail +#define vnic_wq_desc_used fnic_wq_desc_used +#define vnic_wq_next_desc fni_cwq_next_desc +#define vnic_wq_post fnic_wq_post +#define vnic_wq_service fnic_wq_service +#define vnic_wq_free fnic_wq_free +#define vnic_wq_alloc fnic_wq_alloc +#define vnic_wq_init fnic_wq_init +#define vnic_wq_error_status fnic_wq_error_status +#define vnic_wq_enable fnic_wq_enable +#define vnic_wq_disable fnic_wq_disable +#define vnic_wq_clean fnic_wq_clean + +/* Work queue control */ +struct vnic_wq_ctrl { + u64 ring_base; /* 0x00 */ + u32 ring_size; /* 0x08 */ + u32 pad0; + u32 posted_index; /* 0x10 */ + u32 pad1; + u32 cq_index; /* 0x18 */ + u32 pad2; + u32 enable; /* 0x20 */ + u32 pad3; + u32 running; /* 0x28 */ + u32 pad4; + u32 fetch_index; /* 0x30 */ + u32 pad5; + u32 dca_value; /* 0x38 */ + u32 pad6; + u32 error_interrupt_enable; /* 0x40 */ + u32 pad7; + u32 error_interrupt_offset; /* 0x48 */ + u32 pad8; + u32 error_status; /* 0x50 */ + u32 pad9; +}; + +struct vnic_wq_buf { + struct vnic_wq_buf *next; + dma_addr_t dma_addr; + void *os_buf; + unsigned int len; + unsigned int index; + int sop; + void *desc; +}; + +/* Break the vnic_wq_buf allocations into blocks of 64 entries */ +#define VNIC_WQ_BUF_BLK_ENTRIES 64 +#define VNIC_WQ_BUF_BLK_SZ \ + (VNIC_WQ_BUF_BLK_ENTRIES * sizeof(struct vnic_wq_buf)) +#define VNIC_WQ_BUF_BLKS_NEEDED(entries) \ + DIV_ROUND_UP(entries, VNIC_WQ_BUF_BLK_ENTRIES) +#define VNIC_WQ_BUF_BLKS_MAX VNIC_WQ_BUF_BLKS_NEEDED(4096) + +struct vnic_wq { + unsigned int index; + struct vnic_dev *vdev; + struct vnic_wq_ctrl __iomem *ctrl; /* memory-mapped */ + struct vnic_dev_ring ring; + struct vnic_wq_buf *bufs[VNIC_WQ_BUF_BLKS_MAX]; + struct vnic_wq_buf *to_use; + struct vnic_wq_buf *to_clean; + unsigned int pkts_outstanding; +}; + +static inline unsigned int vnic_wq_desc_avail(struct vnic_wq *wq) +{ + /* how many does SW own? */ + return wq->ring.desc_avail; +} + +static inline unsigned int vnic_wq_desc_used(struct vnic_wq *wq) +{ + /* how many does HW own? */ + return wq->ring.desc_count - wq->ring.desc_avail - 1; +} + +static inline void *vnic_wq_next_desc(struct vnic_wq *wq) +{ + return wq->to_use->desc; +} + +static inline void vnic_wq_post(struct vnic_wq *wq, + void *os_buf, dma_addr_t dma_addr, + unsigned int len, int sop, int eop) +{ + struct vnic_wq_buf *buf = wq->to_use; + + buf->sop = sop; + buf->os_buf = eop ? os_buf : NULL; + buf->dma_addr = dma_addr; + buf->len = len; + + buf = buf->next; + if (eop) { + /* Adding write memory barrier prevents compiler and/or CPU + * reordering, thus avoiding descriptor posting before + * descriptor is initialized. Otherwise, hardware can read + * stale descriptor fields. + */ + wmb(); + iowrite32(buf->index, &wq->ctrl->posted_index); + } + wq->to_use = buf; + + wq->ring.desc_avail--; +} + +static inline void vnic_wq_service(struct vnic_wq *wq, + struct cq_desc *cq_desc, u16 completed_index, + void (*buf_service)(struct vnic_wq *wq, + struct cq_desc *cq_desc, struct vnic_wq_buf *buf, void *opaque), + void *opaque) +{ + struct vnic_wq_buf *buf; + + buf = wq->to_clean; + while (1) { + + (*buf_service)(wq, cq_desc, buf, opaque); + + wq->ring.desc_avail++; + + wq->to_clean = buf->next; + + if (buf->index == completed_index) + break; + + buf = wq->to_clean; + } +} + +void vnic_wq_free(struct vnic_wq *wq); +int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index, + unsigned int desc_count, unsigned int desc_size); +void vnic_wq_init(struct vnic_wq *wq, unsigned int cq_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset); +unsigned int vnic_wq_error_status(struct vnic_wq *wq); +void vnic_wq_enable(struct vnic_wq *wq); +int vnic_wq_disable(struct vnic_wq *wq); +void vnic_wq_clean(struct vnic_wq *wq, + void (*buf_clean)(struct vnic_wq *wq, struct vnic_wq_buf *buf)); + +#endif /* _VNIC_WQ_H_ */ diff --git a/drivers/scsi/fnic/vnic_wq_copy.c b/drivers/scsi/fnic/vnic_wq_copy.c new file mode 100644 index 000000000000..9eab7e7caf38 --- /dev/null +++ b/drivers/scsi/fnic/vnic_wq_copy.c @@ -0,0 +1,117 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include "vnic_wq_copy.h" + +void vnic_wq_copy_enable(struct vnic_wq_copy *wq) +{ + iowrite32(1, &wq->ctrl->enable); +} + +int vnic_wq_copy_disable(struct vnic_wq_copy *wq) +{ + unsigned int wait; + + iowrite32(0, &wq->ctrl->enable); + + /* Wait for HW to ACK disable request */ + for (wait = 0; wait < 100; wait++) { + if (!(ioread32(&wq->ctrl->running))) + return 0; + udelay(1); + } + + printk(KERN_ERR "Failed to disable Copy WQ[%d]," + " fetch index=%d, posted_index=%d\n", + wq->index, ioread32(&wq->ctrl->fetch_index), + ioread32(&wq->ctrl->posted_index)); + + return -ENODEV; +} + +void vnic_wq_copy_clean(struct vnic_wq_copy *wq, + void (*q_clean)(struct vnic_wq_copy *wq, + struct fcpio_host_req *wq_desc)) +{ + BUG_ON(ioread32(&wq->ctrl->enable)); + + if (vnic_wq_copy_desc_in_use(wq)) + vnic_wq_copy_service(wq, -1, q_clean); + + wq->to_use_index = wq->to_clean_index = 0; + + iowrite32(0, &wq->ctrl->fetch_index); + iowrite32(0, &wq->ctrl->posted_index); + iowrite32(0, &wq->ctrl->error_status); + + vnic_dev_clear_desc_ring(&wq->ring); +} + +void vnic_wq_copy_free(struct vnic_wq_copy *wq) +{ + struct vnic_dev *vdev; + + vdev = wq->vdev; + vnic_dev_free_desc_ring(vdev, &wq->ring); + wq->ctrl = NULL; +} + +int vnic_wq_copy_alloc(struct vnic_dev *vdev, struct vnic_wq_copy *wq, + unsigned int index, unsigned int desc_count, + unsigned int desc_size) +{ + int err; + + wq->index = index; + wq->vdev = vdev; + wq->to_use_index = wq->to_clean_index = 0; + wq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_WQ, index); + if (!wq->ctrl) { + printk(KERN_ERR "Failed to hook COPY WQ[%d] resource\n", index); + return -EINVAL; + } + + vnic_wq_copy_disable(wq); + + err = vnic_dev_alloc_desc_ring(vdev, &wq->ring, desc_count, desc_size); + if (err) + return err; + + return 0; +} + +void vnic_wq_copy_init(struct vnic_wq_copy *wq, unsigned int cq_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset) +{ + u64 paddr; + + paddr = (u64)wq->ring.base_addr | VNIC_PADDR_TARGET; + writeq(paddr, &wq->ctrl->ring_base); + iowrite32(wq->ring.desc_count, &wq->ctrl->ring_size); + iowrite32(0, &wq->ctrl->fetch_index); + iowrite32(0, &wq->ctrl->posted_index); + iowrite32(cq_index, &wq->ctrl->cq_index); + iowrite32(error_interrupt_enable, &wq->ctrl->error_interrupt_enable); + iowrite32(error_interrupt_offset, &wq->ctrl->error_interrupt_offset); +} + diff --git a/drivers/scsi/fnic/vnic_wq_copy.h b/drivers/scsi/fnic/vnic_wq_copy.h new file mode 100644 index 000000000000..6aff9740c3df --- /dev/null +++ b/drivers/scsi/fnic/vnic_wq_copy.h @@ -0,0 +1,128 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _VNIC_WQ_COPY_H_ +#define _VNIC_WQ_COPY_H_ + +#include <linux/pci.h> +#include "vnic_wq.h" +#include "fcpio.h" + +#define VNIC_WQ_COPY_MAX 1 + +struct vnic_wq_copy { + unsigned int index; + struct vnic_dev *vdev; + struct vnic_wq_ctrl __iomem *ctrl; /* memory-mapped */ + struct vnic_dev_ring ring; + unsigned to_use_index; + unsigned to_clean_index; +}; + +static inline unsigned int vnic_wq_copy_desc_avail(struct vnic_wq_copy *wq) +{ + return wq->ring.desc_avail; +} + +static inline unsigned int vnic_wq_copy_desc_in_use(struct vnic_wq_copy *wq) +{ + return wq->ring.desc_count - 1 - wq->ring.desc_avail; +} + +static inline void *vnic_wq_copy_next_desc(struct vnic_wq_copy *wq) +{ + struct fcpio_host_req *desc = wq->ring.descs; + return &desc[wq->to_use_index]; +} + +static inline void vnic_wq_copy_post(struct vnic_wq_copy *wq) +{ + + ((wq->to_use_index + 1) == wq->ring.desc_count) ? + (wq->to_use_index = 0) : (wq->to_use_index++); + wq->ring.desc_avail--; + + /* Adding write memory barrier prevents compiler and/or CPU + * reordering, thus avoiding descriptor posting before + * descriptor is initialized. Otherwise, hardware can read + * stale descriptor fields. + */ + wmb(); + + iowrite32(wq->to_use_index, &wq->ctrl->posted_index); +} + +static inline void vnic_wq_copy_desc_process(struct vnic_wq_copy *wq, u16 index) +{ + unsigned int cnt; + + if (wq->to_clean_index <= index) + cnt = (index - wq->to_clean_index) + 1; + else + cnt = wq->ring.desc_count - wq->to_clean_index + index + 1; + + wq->to_clean_index = ((index + 1) % wq->ring.desc_count); + wq->ring.desc_avail += cnt; + +} + +static inline void vnic_wq_copy_service(struct vnic_wq_copy *wq, + u16 completed_index, + void (*q_service)(struct vnic_wq_copy *wq, + struct fcpio_host_req *wq_desc)) +{ + struct fcpio_host_req *wq_desc = wq->ring.descs; + unsigned int curr_index; + + while (1) { + + if (q_service) + (*q_service)(wq, &wq_desc[wq->to_clean_index]); + + wq->ring.desc_avail++; + + curr_index = wq->to_clean_index; + + /* increment the to-clean index so that we start + * with an unprocessed index next time we enter the loop + */ + ((wq->to_clean_index + 1) == wq->ring.desc_count) ? + (wq->to_clean_index = 0) : (wq->to_clean_index++); + + if (curr_index == completed_index) + break; + + /* we have cleaned all the entries */ + if ((completed_index == (u16)-1) && + (wq->to_clean_index == wq->to_use_index)) + break; + } +} + +void vnic_wq_copy_enable(struct vnic_wq_copy *wq); +int vnic_wq_copy_disable(struct vnic_wq_copy *wq); +void vnic_wq_copy_free(struct vnic_wq_copy *wq); +int vnic_wq_copy_alloc(struct vnic_dev *vdev, struct vnic_wq_copy *wq, + unsigned int index, unsigned int desc_count, unsigned int desc_size); +void vnic_wq_copy_init(struct vnic_wq_copy *wq, unsigned int cq_index, + unsigned int error_interrupt_enable, + unsigned int error_interrupt_offset); +void vnic_wq_copy_clean(struct vnic_wq_copy *wq, + void (*q_clean)(struct vnic_wq_copy *wq, + struct fcpio_host_req *wq_desc)); + +#endif /* _VNIC_WQ_COPY_H_ */ diff --git a/drivers/scsi/fnic/wq_enet_desc.h b/drivers/scsi/fnic/wq_enet_desc.h new file mode 100644 index 000000000000..b121cbad18b8 --- /dev/null +++ b/drivers/scsi/fnic/wq_enet_desc.h @@ -0,0 +1,96 @@ +/* + * Copyright 2008 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _WQ_ENET_DESC_H_ +#define _WQ_ENET_DESC_H_ + +/* Ethernet work queue descriptor: 16B */ +struct wq_enet_desc { + __le64 address; + __le16 length; + __le16 mss_loopback; + __le16 header_length_flags; + __le16 vlan_tag; +}; + +#define WQ_ENET_ADDR_BITS 64 +#define WQ_ENET_LEN_BITS 14 +#define WQ_ENET_LEN_MASK ((1 << WQ_ENET_LEN_BITS) - 1) +#define WQ_ENET_MSS_BITS 14 +#define WQ_ENET_MSS_MASK ((1 << WQ_ENET_MSS_BITS) - 1) +#define WQ_ENET_MSS_SHIFT 2 +#define WQ_ENET_LOOPBACK_SHIFT 1 +#define WQ_ENET_HDRLEN_BITS 10 +#define WQ_ENET_HDRLEN_MASK ((1 << WQ_ENET_HDRLEN_BITS) - 1) +#define WQ_ENET_FLAGS_OM_BITS 2 +#define WQ_ENET_FLAGS_OM_MASK ((1 << WQ_ENET_FLAGS_OM_BITS) - 1) +#define WQ_ENET_FLAGS_EOP_SHIFT 12 +#define WQ_ENET_FLAGS_CQ_ENTRY_SHIFT 13 +#define WQ_ENET_FLAGS_FCOE_ENCAP_SHIFT 14 +#define WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT 15 + +#define WQ_ENET_OFFLOAD_MODE_CSUM 0 +#define WQ_ENET_OFFLOAD_MODE_RESERVED 1 +#define WQ_ENET_OFFLOAD_MODE_CSUM_L4 2 +#define WQ_ENET_OFFLOAD_MODE_TSO 3 + +static inline void wq_enet_desc_enc(struct wq_enet_desc *desc, + u64 address, u16 length, u16 mss, u16 header_length, + u8 offload_mode, u8 eop, u8 cq_entry, u8 fcoe_encap, + u8 vlan_tag_insert, u16 vlan_tag, u8 loopback) +{ + desc->address = cpu_to_le64(address); + desc->length = cpu_to_le16(length & WQ_ENET_LEN_MASK); + desc->mss_loopback = cpu_to_le16((mss & WQ_ENET_MSS_MASK) << + WQ_ENET_MSS_SHIFT | (loopback & 1) << WQ_ENET_LOOPBACK_SHIFT); + desc->header_length_flags = cpu_to_le16( + (header_length & WQ_ENET_HDRLEN_MASK) | + (offload_mode & WQ_ENET_FLAGS_OM_MASK) << WQ_ENET_HDRLEN_BITS | + (eop & 1) << WQ_ENET_FLAGS_EOP_SHIFT | + (cq_entry & 1) << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT | + (fcoe_encap & 1) << WQ_ENET_FLAGS_FCOE_ENCAP_SHIFT | + (vlan_tag_insert & 1) << WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT); + desc->vlan_tag = cpu_to_le16(vlan_tag); +} + +static inline void wq_enet_desc_dec(struct wq_enet_desc *desc, + u64 *address, u16 *length, u16 *mss, u16 *header_length, + u8 *offload_mode, u8 *eop, u8 *cq_entry, u8 *fcoe_encap, + u8 *vlan_tag_insert, u16 *vlan_tag, u8 *loopback) +{ + *address = le64_to_cpu(desc->address); + *length = le16_to_cpu(desc->length) & WQ_ENET_LEN_MASK; + *mss = (le16_to_cpu(desc->mss_loopback) >> WQ_ENET_MSS_SHIFT) & + WQ_ENET_MSS_MASK; + *loopback = (u8)((le16_to_cpu(desc->mss_loopback) >> + WQ_ENET_LOOPBACK_SHIFT) & 1); + *header_length = le16_to_cpu(desc->header_length_flags) & + WQ_ENET_HDRLEN_MASK; + *offload_mode = (u8)((le16_to_cpu(desc->header_length_flags) >> + WQ_ENET_HDRLEN_BITS) & WQ_ENET_FLAGS_OM_MASK); + *eop = (u8)((le16_to_cpu(desc->header_length_flags) >> + WQ_ENET_FLAGS_EOP_SHIFT) & 1); + *cq_entry = (u8)((le16_to_cpu(desc->header_length_flags) >> + WQ_ENET_FLAGS_CQ_ENTRY_SHIFT) & 1); + *fcoe_encap = (u8)((le16_to_cpu(desc->header_length_flags) >> + WQ_ENET_FLAGS_FCOE_ENCAP_SHIFT) & 1); + *vlan_tag_insert = (u8)((le16_to_cpu(desc->header_length_flags) >> + WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT) & 1); + *vlan_tag = le16_to_cpu(desc->vlan_tag); +} + +#endif /* _WQ_ENET_DESC_H_ */ diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h index babd4cc0cb25..36b1d1052ba1 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.h +++ b/drivers/scsi/mpt2sas/mpt2sas_base.h @@ -69,7 +69,7 @@ #define MPT2SAS_AUTHOR "LSI Corporation <DL-MPTFusionLinux@lsi.com>" #define MPT2SAS_DESCRIPTION "LSI MPT Fusion SAS 2.0 Device Driver" #define MPT2SAS_DRIVER_VERSION "01.100.02.00" -#define MPT2SAS_MAJOR_VERSION 00 +#define MPT2SAS_MAJOR_VERSION 01 #define MPT2SAS_MINOR_VERSION 100 #define MPT2SAS_BUILD_VERSION 02 #define MPT2SAS_RELEASE_VERSION 00 diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 6f51ca485f35..e2b50d8f57a8 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -425,6 +425,7 @@ static struct scsi_target *scsi_alloc_target(struct device *parent, INIT_LIST_HEAD(&starget->devices); starget->state = STARGET_CREATED; starget->scsi_level = SCSI_2; + starget->max_target_blocked = SCSI_DEFAULT_TARGET_BLOCKED; retry: spin_lock_irqsave(shost->host_lock, flags); diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 094795455293..0a2ce7b6325c 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -357,7 +357,7 @@ int iscsi_session_chkready(struct iscsi_cls_session *session) err = 0; break; case ISCSI_SESSION_FAILED: - err = DID_TRANSPORT_DISRUPTED << 16; + err = DID_IMM_RETRY << 16; break; case ISCSI_SESSION_FREE: err = DID_TRANSPORT_FAILFAST << 16; diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index b4b39811b445..a0127e93ade0 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -137,6 +137,7 @@ struct uart_8250_port { unsigned char mcr; unsigned char mcr_mask; /* mask of user bits */ unsigned char mcr_force; /* mask of forced bits */ + unsigned char cur_iotype; /* Running I/O type */ /* * Some bits in registers are cleared on a read, so they must @@ -471,6 +472,7 @@ static void io_serial_out(struct uart_port *p, int offset, int value) static void set_io_from_upio(struct uart_port *p) { + struct uart_8250_port *up = (struct uart_8250_port *)p; switch (p->iotype) { case UPIO_HUB6: p->serial_in = hub6_serial_in; @@ -509,6 +511,8 @@ static void set_io_from_upio(struct uart_port *p) p->serial_out = io_serial_out; break; } + /* Remember loaded iotype */ + up->cur_iotype = p->iotype; } static void @@ -1937,6 +1941,9 @@ static int serial8250_startup(struct uart_port *port) up->capabilities = uart_config[up->port.type].flags; up->mcr = 0; + if (up->port.iotype != up->cur_iotype) + set_io_from_upio(port); + if (up->port.type == PORT_16C950) { /* Wake up and initialize UART */ up->acr = 0; @@ -2563,6 +2570,9 @@ static void serial8250_config_port(struct uart_port *port, int flags) if (ret < 0) probeflags &= ~PROBE_RSA; + if (up->port.iotype != up->cur_iotype) + set_io_from_upio(port); + if (flags & UART_CONFIG_TYPE) autoconfig(up, probeflags); if (up->port.type != PORT_UNKNOWN && flags & UART_CONFIG_IRQ) @@ -2671,6 +2681,11 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev) { int i; + for (i = 0; i < nr_uarts; i++) { + struct uart_8250_port *up = &serial8250_ports[i]; + up->cur_iotype = 0xFF; + } + serial8250_isa_init_ports(); for (i = 0; i < nr_uarts; i++) { diff --git a/drivers/serial/8250_gsc.c b/drivers/serial/8250_gsc.c index 418b4fe9a0a1..33149d982e82 100644 --- a/drivers/serial/8250_gsc.c +++ b/drivers/serial/8250_gsc.c @@ -39,9 +39,9 @@ static int __init serial_init_chip(struct parisc_device *dev) */ if (parisc_parent(dev)->id.hw_type != HPHW_IOA) printk(KERN_INFO - "Serial: device 0x%lx not configured.\n" + "Serial: device 0x%llx not configured.\n" "Enable support for Wax, Lasi, Asp or Dino.\n", - dev->hpa.start); + (unsigned long long)dev->hpa.start); return -ENODEV; } diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c index e3a5ad5ef1d6..cdc049d4350f 100644 --- a/drivers/serial/amba-pl010.c +++ b/drivers/serial/amba-pl010.c @@ -665,7 +665,7 @@ static struct uart_driver amba_reg = { .cons = AMBA_CONSOLE, }; -static int pl010_probe(struct amba_device *dev, void *id) +static int pl010_probe(struct amba_device *dev, struct amba_id *id) { struct uart_amba_port *uap; void __iomem *base; diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c index 8b2b9700f3e4..88fdac51b6c5 100644 --- a/drivers/serial/amba-pl011.c +++ b/drivers/serial/amba-pl011.c @@ -729,7 +729,7 @@ static struct uart_driver amba_reg = { .cons = AMBA_CONSOLE, }; -static int pl011_probe(struct amba_device *dev, void *id) +static int pl011_probe(struct amba_device *dev, struct amba_id *id) { struct uart_amba_port *uap; void __iomem *base; diff --git a/drivers/serial/icom.c b/drivers/serial/icom.c index 6579e2be1dd1..a461b3b2c72d 100644 --- a/drivers/serial/icom.c +++ b/drivers/serial/icom.c @@ -1472,8 +1472,8 @@ static void icom_remove_adapter(struct icom_adapter *icom_adapter) free_irq(icom_adapter->pci_dev->irq, (void *) icom_adapter); iounmap(icom_adapter->base_addr); - icom_free_adapter(icom_adapter); pci_release_regions(icom_adapter->pci_dev); + icom_free_adapter(icom_adapter); } static void icom_kref_release(struct kref *kref) diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c index 9f460b175c50..5f0be40dfdab 100644 --- a/drivers/serial/imx.c +++ b/drivers/serial/imx.c @@ -1031,6 +1031,8 @@ imx_console_setup(struct console *co, char *options) if (co->index == -1 || co->index >= ARRAY_SIZE(imx_ports)) co->index = 0; sport = imx_ports[co->index]; + if(sport == NULL) + return -ENODEV; if (options) uart_parse_options(options, &baud, &parity, &bits, &flow); diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c index 7f72f8ceaa6f..b3feb6198d57 100644 --- a/drivers/serial/mpc52xx_uart.c +++ b/drivers/serial/mpc52xx_uart.c @@ -988,7 +988,7 @@ mpc52xx_console_setup(struct console *co, char *options) pr_debug("mpc52xx_console_setup co=%p, co->index=%i, options=%s\n", co, co->index, options); - if ((co->index < 0) || (co->index > MPC52xx_PSC_MAXNUM)) { + if ((co->index < 0) || (co->index >= MPC52xx_PSC_MAXNUM)) { pr_debug("PSC%x out of range\n", co->index); return -EINVAL; } diff --git a/drivers/ssb/embedded.c b/drivers/ssb/embedded.c index 7dc3a6b41397..a0e0d246b592 100644 --- a/drivers/ssb/embedded.c +++ b/drivers/ssb/embedded.c @@ -29,6 +29,7 @@ int ssb_watchdog_timer_set(struct ssb_bus *bus, u32 ticks) } return -ENODEV; } +EXPORT_SYMBOL(ssb_watchdog_timer_set); u32 ssb_gpio_in(struct ssb_bus *bus, u32 mask) { diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index d0b093b66adc..5e38ba10a3a9 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -961,7 +961,7 @@ void thermal_zone_device_update(struct thermal_zone_device *tz) switch (trip_type) { case THERMAL_TRIP_CRITICAL: - if (temp > trip_temp) { + if (temp >= trip_temp) { if (tz->ops->notify) ret = tz->ops->notify(tz, count, trip_type); @@ -974,7 +974,7 @@ void thermal_zone_device_update(struct thermal_zone_device *tz) } break; case THERMAL_TRIP_HOT: - if (temp > trip_temp) + if (temp >= trip_temp) if (tz->ops->notify) tz->ops->notify(tz, count, trip_type); break; @@ -986,14 +986,14 @@ void thermal_zone_device_update(struct thermal_zone_device *tz) cdev = instance->cdev; - if (temp > trip_temp) + if (temp >= trip_temp) cdev->ops->set_cur_state(cdev, 1); else cdev->ops->set_cur_state(cdev, 0); } break; case THERMAL_TRIP_PASSIVE: - if (temp > trip_temp || tz->passive) + if (temp >= trip_temp || tz->passive) thermal_zone_device_passive(tz, temp, trip_temp, count); break; diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile index 0716cdb44cd8..0a3dc5ece634 100644 --- a/drivers/usb/Makefile +++ b/drivers/usb/Makefile @@ -11,7 +11,6 @@ obj-$(CONFIG_USB_MON) += mon/ obj-$(CONFIG_PCI) += host/ obj-$(CONFIG_USB_EHCI_HCD) += host/ obj-$(CONFIG_USB_ISP116X_HCD) += host/ -obj-$(CONFIG_USB_ISP1760_HCD) += host/ obj-$(CONFIG_USB_OHCI_HCD) += host/ obj-$(CONFIG_USB_UHCI_HCD) += host/ obj-$(CONFIG_USB_FHCI_HCD) += host/ diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 0a69c0977e3f..7a1164dd1d37 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1375,6 +1375,9 @@ static struct usb_device_id acm_ids[] = { { USB_DEVICE(0x0572, 0x1324), /* Conexant USB MODEM RD02-D400 */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, + { USB_DEVICE(0x0572, 0x1328), /* Shiro / Aztech USB MODEM UM-3100 */ + .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ + }, { USB_DEVICE(0x22b8, 0x6425), /* Motorola MOTOMAGX phones */ }, { USB_DEVICE(0x0572, 0x1329), /* Hummingbird huc56s (Conexant) */ diff --git a/drivers/usb/gadget/atmel_usba_udc.c b/drivers/usb/gadget/atmel_usba_udc.c index 563d57275448..05c913cc3658 100644 --- a/drivers/usb/gadget/atmel_usba_udc.c +++ b/drivers/usb/gadget/atmel_usba_udc.c @@ -794,7 +794,8 @@ usba_ep_queue(struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags) if (ep->desc) { list_add_tail(&req->queue, &ep->queue); - if (ep->is_in || (ep_is_control(ep) + if ((!ep_is_control(ep) && ep->is_in) || + (ep_is_control(ep) && (ep->state == DATA_STAGE_IN || ep->state == STATUS_STAGE_IN))) usba_ep_writel(ep, CTL_ENB, USBA_TX_PK_RDY); @@ -1940,7 +1941,7 @@ static int __init usba_udc_probe(struct platform_device *pdev) usba_writel(udc, CTRL, USBA_DISABLE_MASK); clk_disable(pclk); - usba_ep = kmalloc(sizeof(struct usba_ep) * pdata->num_ep, + usba_ep = kzalloc(sizeof(struct usba_ep) * pdata->num_ep, GFP_KERNEL); if (!usba_ep) goto err_alloc_ep; diff --git a/drivers/usb/host/isp1760-hcd.c b/drivers/usb/host/isp1760-hcd.c index cd07ea3f0c63..15438469f21a 100644 --- a/drivers/usb/host/isp1760-hcd.c +++ b/drivers/usb/host/isp1760-hcd.c @@ -1658,6 +1658,7 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, u32 reg_base, or_reg, skip_reg; unsigned long flags; struct ptd ptd; + packet_enqueue *pe; switch (usb_pipetype(urb->pipe)) { case PIPE_ISOCHRONOUS: @@ -1669,6 +1670,7 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, reg_base = INT_REGS_OFFSET; or_reg = HC_INT_IRQ_MASK_OR_REG; skip_reg = HC_INT_PTD_SKIPMAP_REG; + pe = enqueue_an_INT_packet; break; default: @@ -1676,6 +1678,7 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, reg_base = ATL_REGS_OFFSET; or_reg = HC_ATL_IRQ_MASK_OR_REG; skip_reg = HC_ATL_PTD_SKIPMAP_REG; + pe = enqueue_an_ATL_packet; break; } @@ -1687,6 +1690,7 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, u32 skip_map; u32 or_map; struct isp1760_qtd *qtd; + struct isp1760_qh *qh = ints->qh; skip_map = isp1760_readl(hcd->regs + skip_reg); skip_map |= 1 << i; @@ -1699,8 +1703,7 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, priv_write_copy(priv, (u32 *)&ptd, hcd->regs + reg_base + i * sizeof(ptd), sizeof(ptd)); qtd = ints->qtd; - - clean_up_qtdlist(qtd); + qtd = clean_up_qtdlist(qtd); free_mem(priv, ints->payload); @@ -1711,7 +1714,24 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, ints->payload = 0; isp1760_urb_done(priv, urb, status); + if (qtd) + pe(hcd, qh, qtd); break; + + } else if (ints->qtd) { + struct isp1760_qtd *qtd, *prev_qtd = ints->qtd; + + for (qtd = ints->qtd->hw_next; qtd; qtd = qtd->hw_next) { + if (qtd->urb == urb) { + prev_qtd->hw_next = clean_up_qtdlist(qtd); + isp1760_urb_done(priv, urb, status); + break; + } + prev_qtd = qtd; + } + /* we found the urb before the end of the list */ + if (qtd) + break; } ints++; } diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 0ab8474b00cb..d9fcdaedf389 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1487,14 +1487,7 @@ static int ftdi_sio_port_remove(struct usb_serial_port *port) remove_sysfs_attrs(port); - /* all open ports are closed at this point - * (by usbserial.c:__serial_close, which calls ftdi_close) - */ - - if (priv) { - usb_set_serial_port_data(port, NULL); - kref_put(&priv->kref, ftdi_sio_priv_release); - } + kref_put(&priv->kref, ftdi_sio_priv_release); return 0; } diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index 0a566eea49c0..f331e2bde88a 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -974,6 +974,7 @@ int usb_serial_probe(struct usb_interface *interface, if (retval > 0) { /* quietly accept this device, but don't bind to a serial port as it's about to disappear */ + serial->num_ports = 0; goto exit; } } diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c index 61050ab14128..d1f80bac54f0 100644 --- a/drivers/video/amba-clcd.c +++ b/drivers/video/amba-clcd.c @@ -437,7 +437,7 @@ static int clcdfb_register(struct clcd_fb *fb) return ret; } -static int clcdfb_probe(struct amba_device *dev, void *id) +static int clcdfb_probe(struct amba_device *dev, struct amba_id *id) { struct clcd_board *board = dev->dev.platform_data; struct clcd_fb *fb; diff --git a/drivers/video/atmel_lcdfb.c b/drivers/video/atmel_lcdfb.c index 9a577a800db5..2fb63f6ea2f1 100644 --- a/drivers/video/atmel_lcdfb.c +++ b/drivers/video/atmel_lcdfb.c @@ -29,14 +29,8 @@ /* configurable parameters */ #define ATMEL_LCDC_CVAL_DEFAULT 0xc8 -#define ATMEL_LCDC_DMA_BURST_LEN 8 - -#if defined(CONFIG_ARCH_AT91SAM9263) || defined(CONFIG_ARCH_AT91CAP9) || \ - defined(CONFIG_ARCH_AT91SAM9RL) -#define ATMEL_LCDC_FIFO_SIZE 2048 -#else -#define ATMEL_LCDC_FIFO_SIZE 512 -#endif +#define ATMEL_LCDC_DMA_BURST_LEN 8 /* words */ +#define ATMEL_LCDC_FIFO_SIZE 512 /* words */ #if defined(CONFIG_ARCH_AT91) #define ATMEL_LCDFB_FBINFO_DEFAULT (FBINFO_DEFAULT \ diff --git a/drivers/video/omap/dispc.c b/drivers/video/omap/dispc.c index dfb72f5e4c96..148cbcc39602 100644 --- a/drivers/video/omap/dispc.c +++ b/drivers/video/omap/dispc.c @@ -880,20 +880,22 @@ static irqreturn_t omap_dispc_irq_handler(int irq, void *dev) static int get_dss_clocks(void) { - if (IS_ERR((dispc.dss_ick = clk_get(dispc.fbdev->dev, "dss_ick")))) { - dev_err(dispc.fbdev->dev, "can't get dss_ick\n"); + dispc.dss_ick = clk_get(dispc.fbdev->dev, "ick"); + if (IS_ERR(dispc.dss_ick)) { + dev_err(dispc.fbdev->dev, "can't get ick\n"); return PTR_ERR(dispc.dss_ick); } - if (IS_ERR((dispc.dss1_fck = clk_get(dispc.fbdev->dev, "dss1_fck")))) { + dispc.dss1_fck = clk_get(dispc.fbdev->dev, "dss1_fck"); + if (IS_ERR(dispc.dss1_fck)) { dev_err(dispc.fbdev->dev, "can't get dss1_fck\n"); clk_put(dispc.dss_ick); return PTR_ERR(dispc.dss1_fck); } - if (IS_ERR((dispc.dss_54m_fck = - clk_get(dispc.fbdev->dev, "dss_54m_fck")))) { - dev_err(dispc.fbdev->dev, "can't get dss_54m_fck\n"); + dispc.dss_54m_fck = clk_get(dispc.fbdev->dev, "tv_fck"); + if (IS_ERR(dispc.dss_54m_fck)) { + dev_err(dispc.fbdev->dev, "can't get tv_fck\n"); clk_put(dispc.dss_ick); clk_put(dispc.dss1_fck); return PTR_ERR(dispc.dss_54m_fck); diff --git a/drivers/video/omap/rfbi.c b/drivers/video/omap/rfbi.c index a13c8dcad2a8..9332d6ca6456 100644 --- a/drivers/video/omap/rfbi.c +++ b/drivers/video/omap/rfbi.c @@ -83,12 +83,14 @@ static inline u32 rfbi_read_reg(int idx) static int rfbi_get_clocks(void) { - if (IS_ERR((rfbi.dss_ick = clk_get(rfbi.fbdev->dev, "dss_ick")))) { - dev_err(rfbi.fbdev->dev, "can't get dss_ick\n"); + rfbi.dss_ick = clk_get(rfbi.fbdev->dev, "ick"); + if (IS_ERR(rfbi.dss_ick)) { + dev_err(rfbi.fbdev->dev, "can't get ick\n"); return PTR_ERR(rfbi.dss_ick); } - if (IS_ERR((rfbi.dss1_fck = clk_get(rfbi.fbdev->dev, "dss1_fck")))) { + rfbi.dss1_fck = clk_get(rfbi.fbdev->dev, "dss1_fck"); + if (IS_ERR(rfbi.dss1_fck)) { dev_err(rfbi.fbdev->dev, "can't get dss1_fck\n"); clk_put(rfbi.dss_ick); return PTR_ERR(rfbi.dss1_fck); diff --git a/drivers/video/s3c-fb.c b/drivers/video/s3c-fb.c index 5e9c6302433b..d3a568e6b169 100644 --- a/drivers/video/s3c-fb.c +++ b/drivers/video/s3c-fb.c @@ -947,7 +947,8 @@ static int __devexit s3c_fb_remove(struct platform_device *pdev) int win; for (win = 0; win <= S3C_FB_MAX_WIN; win++) - s3c_fb_release_win(sfb, sfb->windows[win]); + if (sfb->windows[win]) + s3c_fb_release_win(sfb, sfb->windows[win]); iounmap(sfb->regs); @@ -985,11 +986,20 @@ static int s3c_fb_suspend(struct platform_device *pdev, pm_message_t state) static int s3c_fb_resume(struct platform_device *pdev) { struct s3c_fb *sfb = platform_get_drvdata(pdev); + struct s3c_fb_platdata *pd = sfb->pdata; struct s3c_fb_win *win; int win_no; clk_enable(sfb->bus_clk); + /* setup registers */ + writel(pd->vidcon1, sfb->regs + VIDCON1); + + /* zero all windows before we do anything */ + for (win_no = 0; win_no < S3C_FB_MAX_WIN; win_no++) + s3c_fb_clear_win(sfb, win_no); + + /* restore framebuffers */ for (win_no = 0; win_no < S3C_FB_MAX_WIN; win_no++) { win = sfb->windows[win_no]; if (!win) diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c index 92ea0ab44ce2..f10d2fbeda06 100644 --- a/drivers/video/sh_mobile_lcdcfb.c +++ b/drivers/video/sh_mobile_lcdcfb.c @@ -47,6 +47,7 @@ struct sh_mobile_lcdc_priv { #endif unsigned long lddckr; struct sh_mobile_lcdc_chan ch[2]; + int started; }; /* shared registers */ @@ -451,6 +452,7 @@ static int sh_mobile_lcdc_start(struct sh_mobile_lcdc_priv *priv) /* start the lcdc */ sh_mobile_lcdc_start_stop(priv, 1); + priv->started = 1; /* tell the board code to enable the panel */ for (k = 0; k < ARRAY_SIZE(priv->ch); k++) { @@ -493,7 +495,10 @@ static void sh_mobile_lcdc_stop(struct sh_mobile_lcdc_priv *priv) } /* stop the lcdc */ - sh_mobile_lcdc_start_stop(priv, 0); + if (priv->started) { + sh_mobile_lcdc_start_stop(priv, 0); + priv->started = 0; + } /* stop clocks */ for (k = 0; k < ARRAY_SIZE(priv->ch); k++) diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 8ac9cddac575..cab100acf983 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -18,6 +18,16 @@ config XEN_SCRUB_PAGES secure, but slightly less efficient. If in doubt, say yes. +config XEN_DEV_EVTCHN + tristate "Xen /dev/xen/evtchn device" + depends on XEN + default y + help + The evtchn driver allows a userspace process to triger event + channels and to receive notification of an event channel + firing. + If in doubt, say yes. + config XENFS tristate "Xen filesystem" depends on XEN @@ -41,3 +51,13 @@ config XEN_COMPAT_XENFS a xen platform. If in doubt, say yes. +config XEN_SYS_HYPERVISOR + bool "Create xen entries under /sys/hypervisor" + depends on XEN && SYSFS + select SYS_HYPERVISOR + default y + help + Create entries under /sys/hypervisor describing the Xen + hypervisor environment. When running native or in another + virtual environment, /sys/hypervisor will still be present, + but will have no xen contents.
\ No newline at end of file diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index ff8accc9e103..ec2a39b1e26f 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -4,4 +4,6 @@ obj-y += xenbus/ obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += balloon.o -obj-$(CONFIG_XENFS) += xenfs/
\ No newline at end of file +obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o +obj-$(CONFIG_XENFS) += xenfs/ +obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
\ No newline at end of file diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 30963af5dba0..891d2e90753a 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -151,6 +151,12 @@ static unsigned int evtchn_from_irq(unsigned irq) return info_for_irq(irq)->evtchn; } +unsigned irq_from_evtchn(unsigned int evtchn) +{ + return evtchn_to_irq[evtchn]; +} +EXPORT_SYMBOL_GPL(irq_from_evtchn); + static enum ipi_vector ipi_from_irq(unsigned irq) { struct irq_info *info = info_for_irq(irq); @@ -335,7 +341,7 @@ static int find_unbound_irq(void) if (irq == nr_irqs) panic("No available IRQ to bind to: increase nr_irqs!\n"); - desc = irq_to_desc_alloc_cpu(irq, 0); + desc = irq_to_desc_alloc_node(irq, 0); if (WARN_ON(desc == NULL)) return -1; @@ -688,13 +694,13 @@ void rebind_evtchn_irq(int evtchn, int irq) } /* Rebind an evtchn so that it gets delivered to a specific cpu */ -static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) +static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) { struct evtchn_bind_vcpu bind_vcpu; int evtchn = evtchn_from_irq(irq); if (!VALID_EVTCHN(evtchn)) - return; + return -1; /* Send future instances of this interrupt to other vcpu. */ bind_vcpu.port = evtchn; @@ -707,13 +713,15 @@ static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) */ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) bind_evtchn_to_cpu(evtchn, tcpu); -} + return 0; +} -static void set_affinity_irq(unsigned irq, const struct cpumask *dest) +static int set_affinity_irq(unsigned irq, const struct cpumask *dest) { unsigned tcpu = cpumask_first(dest); - rebind_irq_to_cpu(irq, tcpu); + + return rebind_irq_to_cpu(irq, tcpu); } int resend_irq_on_evtchn(unsigned int irq) diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c new file mode 100644 index 000000000000..af031950f9b1 --- /dev/null +++ b/drivers/xen/evtchn.c @@ -0,0 +1,507 @@ +/****************************************************************************** + * evtchn.c + * + * Driver for receiving and demuxing event-channel signals. + * + * Copyright (c) 2004-2005, K A Fraser + * Multi-process extensions Copyright (c) 2004, Steven Smith + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/miscdevice.h> +#include <linux/major.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/poll.h> +#include <linux/irq.h> +#include <linux/init.h> +#include <linux/gfp.h> +#include <linux/mutex.h> +#include <linux/cpu.h> +#include <xen/events.h> +#include <xen/evtchn.h> +#include <asm/xen/hypervisor.h> + +struct per_user_data { + struct mutex bind_mutex; /* serialize bind/unbind operations */ + + /* Notification ring, accessed via /dev/xen/evtchn. */ +#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) +#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1)) + evtchn_port_t *ring; + unsigned int ring_cons, ring_prod, ring_overflow; + struct mutex ring_cons_mutex; /* protect against concurrent readers */ + + /* Processes wait on this queue when ring is empty. */ + wait_queue_head_t evtchn_wait; + struct fasync_struct *evtchn_async_queue; + const char *name; +}; + +/* Who's bound to each port? */ +static struct per_user_data *port_user[NR_EVENT_CHANNELS]; +static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ + +irqreturn_t evtchn_interrupt(int irq, void *data) +{ + unsigned int port = (unsigned long)data; + struct per_user_data *u; + + spin_lock(&port_user_lock); + + u = port_user[port]; + + disable_irq_nosync(irq); + + if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { + u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; + wmb(); /* Ensure ring contents visible */ + if (u->ring_cons == u->ring_prod++) { + wake_up_interruptible(&u->evtchn_wait); + kill_fasync(&u->evtchn_async_queue, + SIGIO, POLL_IN); + } + } else { + u->ring_overflow = 1; + } + + spin_unlock(&port_user_lock); + + return IRQ_HANDLED; +} + +static ssize_t evtchn_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int rc; + unsigned int c, p, bytes1 = 0, bytes2 = 0; + struct per_user_data *u = file->private_data; + + /* Whole number of ports. */ + count &= ~(sizeof(evtchn_port_t)-1); + + if (count == 0) + return 0; + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + + for (;;) { + mutex_lock(&u->ring_cons_mutex); + + rc = -EFBIG; + if (u->ring_overflow) + goto unlock_out; + + c = u->ring_cons; + p = u->ring_prod; + if (c != p) + break; + + mutex_unlock(&u->ring_cons_mutex); + + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + + rc = wait_event_interruptible(u->evtchn_wait, + u->ring_cons != u->ring_prod); + if (rc) + return rc; + } + + /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ + if (((c ^ p) & EVTCHN_RING_SIZE) != 0) { + bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * + sizeof(evtchn_port_t); + bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t); + } else { + bytes1 = (p - c) * sizeof(evtchn_port_t); + bytes2 = 0; + } + + /* Truncate chunks according to caller's maximum byte count. */ + if (bytes1 > count) { + bytes1 = count; + bytes2 = 0; + } else if ((bytes1 + bytes2) > count) { + bytes2 = count - bytes1; + } + + rc = -EFAULT; + rmb(); /* Ensure that we see the port before we copy it. */ + if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) || + ((bytes2 != 0) && + copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) + goto unlock_out; + + u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t); + rc = bytes1 + bytes2; + + unlock_out: + mutex_unlock(&u->ring_cons_mutex); + return rc; +} + +static ssize_t evtchn_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + int rc, i; + evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL); + struct per_user_data *u = file->private_data; + + if (kbuf == NULL) + return -ENOMEM; + + /* Whole number of ports. */ + count &= ~(sizeof(evtchn_port_t)-1); + + rc = 0; + if (count == 0) + goto out; + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + + rc = -EFAULT; + if (copy_from_user(kbuf, buf, count) != 0) + goto out; + + spin_lock_irq(&port_user_lock); + for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) + if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u)) + enable_irq(irq_from_evtchn(kbuf[i])); + spin_unlock_irq(&port_user_lock); + + rc = count; + + out: + free_page((unsigned long)kbuf); + return rc; +} + +static int evtchn_bind_to_user(struct per_user_data *u, int port) +{ + int rc = 0; + + /* + * Ports are never reused, so every caller should pass in a + * unique port. + * + * (Locking not necessary because we haven't registered the + * interrupt handler yet, and our caller has already + * serialized bind operations.) + */ + BUG_ON(port_user[port] != NULL); + port_user[port] = u; + + rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, + u->name, (void *)(unsigned long)port); + if (rc >= 0) + rc = 0; + + return rc; +} + +static void evtchn_unbind_from_user(struct per_user_data *u, int port) +{ + int irq = irq_from_evtchn(port); + + unbind_from_irqhandler(irq, (void *)(unsigned long)port); + + /* make sure we unbind the irq handler before clearing the port */ + barrier(); + + port_user[port] = NULL; +} + +static long evtchn_ioctl(struct file *file, + unsigned int cmd, unsigned long arg) +{ + int rc; + struct per_user_data *u = file->private_data; + void __user *uarg = (void __user *) arg; + + /* Prevent bind from racing with unbind */ + mutex_lock(&u->bind_mutex); + + switch (cmd) { + case IOCTL_EVTCHN_BIND_VIRQ: { + struct ioctl_evtchn_bind_virq bind; + struct evtchn_bind_virq bind_virq; + + rc = -EFAULT; + if (copy_from_user(&bind, uarg, sizeof(bind))) + break; + + bind_virq.virq = bind.virq; + bind_virq.vcpu = 0; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq); + if (rc != 0) + break; + + rc = evtchn_bind_to_user(u, bind_virq.port); + if (rc == 0) + rc = bind_virq.port; + break; + } + + case IOCTL_EVTCHN_BIND_INTERDOMAIN: { + struct ioctl_evtchn_bind_interdomain bind; + struct evtchn_bind_interdomain bind_interdomain; + + rc = -EFAULT; + if (copy_from_user(&bind, uarg, sizeof(bind))) + break; + + bind_interdomain.remote_dom = bind.remote_domain; + bind_interdomain.remote_port = bind.remote_port; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, + &bind_interdomain); + if (rc != 0) + break; + + rc = evtchn_bind_to_user(u, bind_interdomain.local_port); + if (rc == 0) + rc = bind_interdomain.local_port; + break; + } + + case IOCTL_EVTCHN_BIND_UNBOUND_PORT: { + struct ioctl_evtchn_bind_unbound_port bind; + struct evtchn_alloc_unbound alloc_unbound; + + rc = -EFAULT; + if (copy_from_user(&bind, uarg, sizeof(bind))) + break; + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = bind.remote_domain; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (rc != 0) + break; + + rc = evtchn_bind_to_user(u, alloc_unbound.port); + if (rc == 0) + rc = alloc_unbound.port; + break; + } + + case IOCTL_EVTCHN_UNBIND: { + struct ioctl_evtchn_unbind unbind; + + rc = -EFAULT; + if (copy_from_user(&unbind, uarg, sizeof(unbind))) + break; + + rc = -EINVAL; + if (unbind.port >= NR_EVENT_CHANNELS) + break; + + spin_lock_irq(&port_user_lock); + + rc = -ENOTCONN; + if (port_user[unbind.port] != u) { + spin_unlock_irq(&port_user_lock); + break; + } + + evtchn_unbind_from_user(u, unbind.port); + + spin_unlock_irq(&port_user_lock); + + rc = 0; + break; + } + + case IOCTL_EVTCHN_NOTIFY: { + struct ioctl_evtchn_notify notify; + + rc = -EFAULT; + if (copy_from_user(¬ify, uarg, sizeof(notify))) + break; + + if (notify.port >= NR_EVENT_CHANNELS) { + rc = -EINVAL; + } else if (port_user[notify.port] != u) { + rc = -ENOTCONN; + } else { + notify_remote_via_evtchn(notify.port); + rc = 0; + } + break; + } + + case IOCTL_EVTCHN_RESET: { + /* Initialise the ring to empty. Clear errors. */ + mutex_lock(&u->ring_cons_mutex); + spin_lock_irq(&port_user_lock); + u->ring_cons = u->ring_prod = u->ring_overflow = 0; + spin_unlock_irq(&port_user_lock); + mutex_unlock(&u->ring_cons_mutex); + rc = 0; + break; + } + + default: + rc = -ENOSYS; + break; + } + mutex_unlock(&u->bind_mutex); + + return rc; +} + +static unsigned int evtchn_poll(struct file *file, poll_table *wait) +{ + unsigned int mask = POLLOUT | POLLWRNORM; + struct per_user_data *u = file->private_data; + + poll_wait(file, &u->evtchn_wait, wait); + if (u->ring_cons != u->ring_prod) + mask |= POLLIN | POLLRDNORM; + if (u->ring_overflow) + mask = POLLERR; + return mask; +} + +static int evtchn_fasync(int fd, struct file *filp, int on) +{ + struct per_user_data *u = filp->private_data; + return fasync_helper(fd, filp, on, &u->evtchn_async_queue); +} + +static int evtchn_open(struct inode *inode, struct file *filp) +{ + struct per_user_data *u; + + u = kzalloc(sizeof(*u), GFP_KERNEL); + if (u == NULL) + return -ENOMEM; + + u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm); + if (u->name == NULL) { + kfree(u); + return -ENOMEM; + } + + init_waitqueue_head(&u->evtchn_wait); + + u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL); + if (u->ring == NULL) { + kfree(u->name); + kfree(u); + return -ENOMEM; + } + + mutex_init(&u->bind_mutex); + mutex_init(&u->ring_cons_mutex); + + filp->private_data = u; + + return 0; +} + +static int evtchn_release(struct inode *inode, struct file *filp) +{ + int i; + struct per_user_data *u = filp->private_data; + + spin_lock_irq(&port_user_lock); + + free_page((unsigned long)u->ring); + + for (i = 0; i < NR_EVENT_CHANNELS; i++) { + if (port_user[i] != u) + continue; + + evtchn_unbind_from_user(port_user[i], i); + } + + spin_unlock_irq(&port_user_lock); + + kfree(u->name); + kfree(u); + + return 0; +} + +static const struct file_operations evtchn_fops = { + .owner = THIS_MODULE, + .read = evtchn_read, + .write = evtchn_write, + .unlocked_ioctl = evtchn_ioctl, + .poll = evtchn_poll, + .fasync = evtchn_fasync, + .open = evtchn_open, + .release = evtchn_release, +}; + +static struct miscdevice evtchn_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "evtchn", + .fops = &evtchn_fops, +}; +static int __init evtchn_init(void) +{ + int err; + + if (!xen_domain()) + return -ENODEV; + + spin_lock_init(&port_user_lock); + memset(port_user, 0, sizeof(port_user)); + + /* Create '/dev/misc/evtchn'. */ + err = misc_register(&evtchn_miscdev); + if (err != 0) { + printk(KERN_ALERT "Could not register /dev/misc/evtchn\n"); + return err; + } + + printk(KERN_INFO "Event-channel device installed.\n"); + + return 0; +} + +static void __exit evtchn_cleanup(void) +{ + misc_deregister(&evtchn_miscdev); +} + +module_init(evtchn_init); +module_exit(evtchn_cleanup); + +MODULE_LICENSE("GPL"); diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 4b5b84837ee1..fddc2025dece 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -98,9 +98,8 @@ static void do_suspend(void) goto out; } - printk("suspending xenbus...\n"); - /* XXX use normal device tree? */ - xenbus_suspend(); + printk(KERN_DEBUG "suspending xenstore...\n"); + xs_suspend(); err = device_power_down(PMSG_SUSPEND); if (err) { @@ -116,9 +115,9 @@ static void do_suspend(void) if (!cancelled) { xen_arch_resume(); - xenbus_resume(); + xs_resume(); } else - xenbus_suspend_cancel(); + xs_suspend_cancel(); device_power_up(PMSG_RESUME); diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c new file mode 100644 index 000000000000..88a60e03ccf0 --- /dev/null +++ b/drivers/xen/sys-hypervisor.c @@ -0,0 +1,445 @@ +/* + * copyright (c) 2006 IBM Corporation + * Authored by: Mike D. Day <ncmike@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/kobject.h> + +#include <asm/xen/hypervisor.h> +#include <asm/xen/hypercall.h> + +#include <xen/xenbus.h> +#include <xen/interface/xen.h> +#include <xen/interface/version.h> + +#define HYPERVISOR_ATTR_RO(_name) \ +static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name) + +#define HYPERVISOR_ATTR_RW(_name) \ +static struct hyp_sysfs_attr _name##_attr = \ + __ATTR(_name, 0644, _name##_show, _name##_store) + +struct hyp_sysfs_attr { + struct attribute attr; + ssize_t (*show)(struct hyp_sysfs_attr *, char *); + ssize_t (*store)(struct hyp_sysfs_attr *, const char *, size_t); + void *hyp_attr_data; +}; + +static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + return sprintf(buffer, "xen\n"); +} + +HYPERVISOR_ATTR_RO(type); + +static int __init xen_sysfs_type_init(void) +{ + return sysfs_create_file(hypervisor_kobj, &type_attr.attr); +} + +static void xen_sysfs_type_destroy(void) +{ + sysfs_remove_file(hypervisor_kobj, &type_attr.attr); +} + +/* xen version attributes */ +static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int version = HYPERVISOR_xen_version(XENVER_version, NULL); + if (version) + return sprintf(buffer, "%d\n", version >> 16); + return -ENODEV; +} + +HYPERVISOR_ATTR_RO(major); + +static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int version = HYPERVISOR_xen_version(XENVER_version, NULL); + if (version) + return sprintf(buffer, "%d\n", version & 0xff); + return -ENODEV; +} + +HYPERVISOR_ATTR_RO(minor); + +static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + char *extra; + + extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL); + if (extra) { + ret = HYPERVISOR_xen_version(XENVER_extraversion, extra); + if (!ret) + ret = sprintf(buffer, "%s\n", extra); + kfree(extra); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(extra); + +static struct attribute *version_attrs[] = { + &major_attr.attr, + &minor_attr.attr, + &extra_attr.attr, + NULL +}; + +static struct attribute_group version_group = { + .name = "version", + .attrs = version_attrs, +}; + +static int __init xen_sysfs_version_init(void) +{ + return sysfs_create_group(hypervisor_kobj, &version_group); +} + +static void xen_sysfs_version_destroy(void) +{ + sysfs_remove_group(hypervisor_kobj, &version_group); +} + +/* UUID */ + +static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + char *vm, *val; + int ret; + extern int xenstored_ready; + + if (!xenstored_ready) + return -EBUSY; + + vm = xenbus_read(XBT_NIL, "vm", "", NULL); + if (IS_ERR(vm)) + return PTR_ERR(vm); + val = xenbus_read(XBT_NIL, vm, "uuid", NULL); + kfree(vm); + if (IS_ERR(val)) + return PTR_ERR(val); + ret = sprintf(buffer, "%s\n", val); + kfree(val); + return ret; +} + +HYPERVISOR_ATTR_RO(uuid); + +static int __init xen_sysfs_uuid_init(void) +{ + return sysfs_create_file(hypervisor_kobj, &uuid_attr.attr); +} + +static void xen_sysfs_uuid_destroy(void) +{ + sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr); +} + +/* xen compilation attributes */ + +static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + struct xen_compile_info *info; + + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); + if (info) { + ret = HYPERVISOR_xen_version(XENVER_compile_info, info); + if (!ret) + ret = sprintf(buffer, "%s\n", info->compiler); + kfree(info); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(compiler); + +static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + struct xen_compile_info *info; + + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); + if (info) { + ret = HYPERVISOR_xen_version(XENVER_compile_info, info); + if (!ret) + ret = sprintf(buffer, "%s\n", info->compile_by); + kfree(info); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(compiled_by); + +static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + struct xen_compile_info *info; + + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); + if (info) { + ret = HYPERVISOR_xen_version(XENVER_compile_info, info); + if (!ret) + ret = sprintf(buffer, "%s\n", info->compile_date); + kfree(info); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(compile_date); + +static struct attribute *xen_compile_attrs[] = { + &compiler_attr.attr, + &compiled_by_attr.attr, + &compile_date_attr.attr, + NULL +}; + +static struct attribute_group xen_compilation_group = { + .name = "compilation", + .attrs = xen_compile_attrs, +}; + +int __init static xen_compilation_init(void) +{ + return sysfs_create_group(hypervisor_kobj, &xen_compilation_group); +} + +static void xen_compilation_destroy(void) +{ + sysfs_remove_group(hypervisor_kobj, &xen_compilation_group); +} + +/* xen properties info */ + +static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + char *caps; + + caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL); + if (caps) { + ret = HYPERVISOR_xen_version(XENVER_capabilities, caps); + if (!ret) + ret = sprintf(buffer, "%s\n", caps); + kfree(caps); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(capabilities); + +static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + char *cset; + + cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL); + if (cset) { + ret = HYPERVISOR_xen_version(XENVER_changeset, cset); + if (!ret) + ret = sprintf(buffer, "%s\n", cset); + kfree(cset); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(changeset); + +static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + struct xen_platform_parameters *parms; + + parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL); + if (parms) { + ret = HYPERVISOR_xen_version(XENVER_platform_parameters, + parms); + if (!ret) + ret = sprintf(buffer, "%lx\n", parms->virt_start); + kfree(parms); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(virtual_start); + +static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret; + + ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL); + if (ret > 0) + ret = sprintf(buffer, "%x\n", ret); + + return ret; +} + +HYPERVISOR_ATTR_RO(pagesize); + +static ssize_t xen_feature_show(int index, char *buffer) +{ + ssize_t ret; + struct xen_feature_info info; + + info.submap_idx = index; + ret = HYPERVISOR_xen_version(XENVER_get_features, &info); + if (!ret) + ret = sprintf(buffer, "%08x", info.submap); + + return ret; +} + +static ssize_t features_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + ssize_t len; + int i; + + len = 0; + for (i = XENFEAT_NR_SUBMAPS-1; i >= 0; i--) { + int ret = xen_feature_show(i, buffer + len); + if (ret < 0) { + if (len == 0) + len = ret; + break; + } + len += ret; + } + if (len > 0) + buffer[len++] = '\n'; + + return len; +} + +HYPERVISOR_ATTR_RO(features); + +static struct attribute *xen_properties_attrs[] = { + &capabilities_attr.attr, + &changeset_attr.attr, + &virtual_start_attr.attr, + &pagesize_attr.attr, + &features_attr.attr, + NULL +}; + +static struct attribute_group xen_properties_group = { + .name = "properties", + .attrs = xen_properties_attrs, +}; + +static int __init xen_properties_init(void) +{ + return sysfs_create_group(hypervisor_kobj, &xen_properties_group); +} + +static void xen_properties_destroy(void) +{ + sysfs_remove_group(hypervisor_kobj, &xen_properties_group); +} + +static int __init hyper_sysfs_init(void) +{ + int ret; + + if (!xen_domain()) + return -ENODEV; + + ret = xen_sysfs_type_init(); + if (ret) + goto out; + ret = xen_sysfs_version_init(); + if (ret) + goto version_out; + ret = xen_compilation_init(); + if (ret) + goto comp_out; + ret = xen_sysfs_uuid_init(); + if (ret) + goto uuid_out; + ret = xen_properties_init(); + if (ret) + goto prop_out; + + goto out; + +prop_out: + xen_sysfs_uuid_destroy(); +uuid_out: + xen_compilation_destroy(); +comp_out: + xen_sysfs_version_destroy(); +version_out: + xen_sysfs_type_destroy(); +out: + return ret; +} + +static void __exit hyper_sysfs_exit(void) +{ + xen_properties_destroy(); + xen_compilation_destroy(); + xen_sysfs_uuid_destroy(); + xen_sysfs_version_destroy(); + xen_sysfs_type_destroy(); + +} +module_init(hyper_sysfs_init); +module_exit(hyper_sysfs_exit); + +static ssize_t hyp_sysfs_show(struct kobject *kobj, + struct attribute *attr, + char *buffer) +{ + struct hyp_sysfs_attr *hyp_attr; + hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr); + if (hyp_attr->show) + return hyp_attr->show(hyp_attr, buffer); + return 0; +} + +static ssize_t hyp_sysfs_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, + size_t len) +{ + struct hyp_sysfs_attr *hyp_attr; + hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr); + if (hyp_attr->store) + return hyp_attr->store(hyp_attr, buffer, len); + return 0; +} + +static struct sysfs_ops hyp_sysfs_ops = { + .show = hyp_sysfs_show, + .store = hyp_sysfs_store, +}; + +static struct kobj_type hyp_sysfs_kobj_type = { + .sysfs_ops = &hyp_sysfs_ops, +}; + +static int __init hypervisor_subsys_init(void) +{ + if (!xen_domain()) + return -ENODEV; + + hypervisor_kobj->ktype = &hyp_sysfs_kobj_type; + return 0; +} +device_initcall(hypervisor_subsys_init); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 773d1cf23283..d42e25d5968d 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -71,6 +71,9 @@ static int xenbus_probe_frontend(const char *type, const char *name); static void xenbus_dev_shutdown(struct device *_dev); +static int xenbus_dev_suspend(struct device *dev, pm_message_t state); +static int xenbus_dev_resume(struct device *dev); + /* If something in array of ids matches this device, return it. */ static const struct xenbus_device_id * match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) @@ -188,6 +191,9 @@ static struct xen_bus_type xenbus_frontend = { .remove = xenbus_dev_remove, .shutdown = xenbus_dev_shutdown, .dev_attrs = xenbus_dev_attrs, + + .suspend = xenbus_dev_suspend, + .resume = xenbus_dev_resume, }, }; @@ -654,6 +660,7 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus) kfree(root); } +EXPORT_SYMBOL_GPL(xenbus_dev_changed); static void frontend_changed(struct xenbus_watch *watch, const char **vec, unsigned int len) @@ -669,7 +676,7 @@ static struct xenbus_watch fe_watch = { .callback = frontend_changed, }; -static int suspend_dev(struct device *dev, void *data) +static int xenbus_dev_suspend(struct device *dev, pm_message_t state) { int err = 0; struct xenbus_driver *drv; @@ -682,35 +689,14 @@ static int suspend_dev(struct device *dev, void *data) drv = to_xenbus_driver(dev->driver); xdev = container_of(dev, struct xenbus_device, dev); if (drv->suspend) - err = drv->suspend(xdev); + err = drv->suspend(xdev, state); if (err) printk(KERN_WARNING "xenbus: suspend %s failed: %i\n", dev_name(dev), err); return 0; } -static int suspend_cancel_dev(struct device *dev, void *data) -{ - int err = 0; - struct xenbus_driver *drv; - struct xenbus_device *xdev; - - DPRINTK(""); - - if (dev->driver == NULL) - return 0; - drv = to_xenbus_driver(dev->driver); - xdev = container_of(dev, struct xenbus_device, dev); - if (drv->suspend_cancel) - err = drv->suspend_cancel(xdev); - if (err) - printk(KERN_WARNING - "xenbus: suspend_cancel %s failed: %i\n", - dev_name(dev), err); - return 0; -} - -static int resume_dev(struct device *dev, void *data) +static int xenbus_dev_resume(struct device *dev) { int err; struct xenbus_driver *drv; @@ -755,33 +741,6 @@ static int resume_dev(struct device *dev, void *data) return 0; } -void xenbus_suspend(void) -{ - DPRINTK(""); - - bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev); - xenbus_backend_suspend(suspend_dev); - xs_suspend(); -} -EXPORT_SYMBOL_GPL(xenbus_suspend); - -void xenbus_resume(void) -{ - xb_init_comms(); - xs_resume(); - bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev); - xenbus_backend_resume(resume_dev); -} -EXPORT_SYMBOL_GPL(xenbus_resume); - -void xenbus_suspend_cancel(void) -{ - xs_suspend_cancel(); - bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev); - xenbus_backend_resume(suspend_cancel_dev); -} -EXPORT_SYMBOL_GPL(xenbus_suspend_cancel); - /* A flag to determine if xenstored is 'ready' (i.e. has started) */ int xenstored_ready = 0; diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index e325eab4724d..eab33f1dbdf7 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -673,6 +673,8 @@ void xs_resume(void) struct xenbus_watch *watch; char token[sizeof(watch) * 2 + 1]; + xb_init_comms(); + mutex_unlock(&xs_state.response_mutex); mutex_unlock(&xs_state.request_mutex); up_write(&xs_state.transaction_mutex); diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index 515741a8e6b8..6559e0c752ce 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -20,10 +20,27 @@ MODULE_DESCRIPTION("Xen filesystem"); MODULE_LICENSE("GPL"); +static ssize_t capabilities_read(struct file *file, char __user *buf, + size_t size, loff_t *off) +{ + char *tmp = ""; + + if (xen_initial_domain()) + tmp = "control_d\n"; + + return simple_read_from_buffer(buf, size, off, tmp, strlen(tmp)); +} + +static const struct file_operations capabilities_file_ops = { + .read = capabilities_read, +}; + static int xenfs_fill_super(struct super_block *sb, void *data, int silent) { static struct tree_descr xenfs_files[] = { - [2] = {"xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR}, + [1] = {}, + { "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR }, + { "capabilities", &capabilities_file_ops, S_IRUGO }, {""}, }; diff --git a/firmware/cis/.gitignore b/firmware/cis/.gitignore new file mode 100644 index 000000000000..1de39847f261 --- /dev/null +++ b/firmware/cis/.gitignore @@ -0,0 +1 @@ +*.cis diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index eeb246845909..2341375386f8 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -297,20 +297,14 @@ static int validate_request(struct autofs_wait_queue **wait, */ if (notify == NFY_MOUNT) { /* - * If the dentry isn't hashed just go ahead and try the - * mount again with a new wait (not much else we can do). - */ - if (!d_unhashed(dentry)) { - /* - * But if the dentry is hashed, that means that we - * got here through the revalidate path. Thus, we - * need to check if the dentry has been mounted - * while we waited on the wq_mutex. If it has, - * simply return success. - */ - if (d_mountpoint(dentry)) - return 0; - } + * If the dentry was successfully mounted while we slept + * on the wait queue mutex we can return success. If it + * isn't mounted (doesn't have submounts for the case of + * a multi-mount with no mount at it's base) we can + * continue on and create a new request. + */ + if (have_submounts(dentry)) + return 0; } return 1; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 5cebf0b37798..697f6b5f1313 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -41,6 +41,7 @@ #include <asm/uaccess.h> #include <asm/unaligned.h> #include <asm/cacheflush.h> +#include <asm/page.h> /****************************************************************************/ @@ -54,6 +55,18 @@ #define DBG_FLT(a...) #endif +/* + * User data (stack, data section and bss) needs to be aligned + * for the same reasons as SLAB memory is, and to the same amount. + * Avoid duplicating architecture specific code by using the same + * macro as with SLAB allocation: + */ +#ifdef ARCH_SLAB_MINALIGN +#define FLAT_DATA_ALIGN (ARCH_SLAB_MINALIGN) +#else +#define FLAT_DATA_ALIGN (sizeof(void *)) +#endif + #define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */ #define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */ @@ -114,20 +127,18 @@ static unsigned long create_flat_tables( int envc = bprm->envc; char uninitialized_var(dummy); - sp = (unsigned long *) ((-(unsigned long)sizeof(char *))&(unsigned long) p); + sp = (unsigned long *)p; + sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0); + sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN); + argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0); + envp = argv + (argc + 1); - sp -= envc+1; - envp = sp; - sp -= argc+1; - argv = sp; - - flat_stack_align(sp); if (flat_argvp_envp_on_stack()) { - --sp; put_user((unsigned long) envp, sp); - --sp; put_user((unsigned long) argv, sp); + put_user((unsigned long) envp, sp + 2); + put_user((unsigned long) argv, sp + 1); } - put_user(argc,--sp); + put_user(argc, sp); current->mm->arg_start = (unsigned long) p; while (argc-->0) { put_user((unsigned long) p, argv++); @@ -558,7 +569,9 @@ static int load_flat_file(struct linux_binprm * bprm, ret = realdatastart; goto err; } - datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long); + datapos = ALIGN(realdatastart + + MAX_SHARED_LIBS * sizeof(unsigned long), + FLAT_DATA_ALIGN); DBG_FLT("BINFMT_FLAT: Allocated data+bss+stack (%d bytes): %x\n", (int)(data_len + bss_len + stack_len), (int)datapos); @@ -604,9 +617,12 @@ static int load_flat_file(struct linux_binprm * bprm, } realdatastart = textpos + ntohl(hdr->data_start); - datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long); - reloc = (unsigned long *) (textpos + ntohl(hdr->reloc_start) + - MAX_SHARED_LIBS * sizeof(unsigned long)); + datapos = ALIGN(realdatastart + + MAX_SHARED_LIBS * sizeof(unsigned long), + FLAT_DATA_ALIGN); + + reloc = (unsigned long *) + (datapos + (ntohl(hdr->reloc_start) - text_len)); memp = textpos; memp_size = len; #ifdef CONFIG_BINFMT_ZFLAT @@ -854,7 +870,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) stack_len = TOP_OF_ARGS - bprm->p; /* the strings */ stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */ stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */ - + stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */ res = load_flat_file(bprm, &libinfo, 0, &stack_len); if (res > (unsigned long)-4096) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3e2c7c738f23..35af93355063 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2622,7 +2622,18 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, search_start); if (block_group && block_group_bits(block_group, data)) { down_read(&space_info->groups_sem); - goto have_block_group; + if (list_empty(&block_group->list) || + block_group->ro) { + /* + * someone is removing this block group, + * we can't jump into the have_block_group + * target because our list pointers are not + * valid + */ + btrfs_put_block_group(block_group); + up_read(&space_info->groups_sem); + } else + goto have_block_group; } else if (block_group) { btrfs_put_block_group(block_group); } @@ -2656,6 +2667,13 @@ have_block_group: * people trying to start a new cluster */ spin_lock(&last_ptr->refill_lock); + if (last_ptr->block_group && + (last_ptr->block_group->ro || + !block_group_bits(last_ptr->block_group, data))) { + offset = 0; + goto refill_cluster; + } + offset = btrfs_alloc_from_cluster(block_group, last_ptr, num_bytes, search_start); if (offset) { @@ -2681,10 +2699,17 @@ have_block_group: last_ptr_loop = 1; search_start = block_group->key.objectid; + /* + * we know this block group is properly + * in the list because + * btrfs_remove_block_group, drops the + * cluster before it removes the block + * group from the list + */ goto have_block_group; } spin_unlock(&last_ptr->lock); - +refill_cluster: /* * this cluster didn't work out, free it and * start over @@ -5968,6 +5993,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, { struct btrfs_path *path; struct btrfs_block_group_cache *block_group; + struct btrfs_free_cluster *cluster; struct btrfs_key key; int ret; @@ -5979,6 +6005,21 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, memcpy(&key, &block_group->key, sizeof(key)); + /* make sure this block group isn't part of an allocation cluster */ + cluster = &root->fs_info->data_alloc_cluster; + spin_lock(&cluster->refill_lock); + btrfs_return_cluster_to_free_space(block_group, cluster); + spin_unlock(&cluster->refill_lock); + + /* + * make sure this block group isn't part of a metadata + * allocation cluster + */ + cluster = &root->fs_info->meta_alloc_cluster; + spin_lock(&cluster->refill_lock); + btrfs_return_cluster_to_free_space(block_group, cluster); + spin_unlock(&cluster->refill_lock); + path = btrfs_alloc_path(); BUG_ON(!path); @@ -5988,7 +6029,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, spin_unlock(&root->fs_info->block_group_cache_lock); btrfs_remove_free_space_cache(block_group); down_write(&block_group->space_info->groups_sem); - list_del(&block_group->list); + /* + * we must use list_del_init so people can check to see if they + * are still on the list after taking the semaphore + */ + list_del_init(&block_group->list); up_write(&block_group->space_info->groups_sem); spin_lock(&block_group->space_info->lock); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5f01dad4b696..a6d35b0054ca 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1440,6 +1440,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) device->io_align = root->sectorsize; device->sector_size = root->sectorsize; device->total_bytes = i_size_read(bdev->bd_inode); + device->disk_total_bytes = device->total_bytes; device->dev_root = root->fs_info->dev_root; device->bdev = bdev; device->in_fs_metadata = 1; diff --git a/fs/buffer.c b/fs/buffer.c index aed297739eb0..49106127a4aa 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2736,6 +2736,8 @@ has_buffers: pos += blocksize; } + map_bh.b_size = blocksize; + map_bh.b_state = 0; err = get_block(inode, iblock, &map_bh, 0); if (err) goto unlock; diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 19218e1463d6..f7c255f9c624 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -122,13 +122,13 @@ static inline void cachefiles_state_changed(struct cachefiles_cache *cache) } /* - * cf-bind.c + * bind.c */ extern int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args); extern void cachefiles_daemon_unbind(struct cachefiles_cache *cache); /* - * cf-daemon.c + * daemon.c */ extern const struct file_operations cachefiles_daemon_fops; @@ -136,17 +136,17 @@ extern int cachefiles_has_space(struct cachefiles_cache *cache, unsigned fnr, unsigned bnr); /* - * cf-interface.c + * interface.c */ extern const struct fscache_cache_ops cachefiles_cache_ops; /* - * cf-key.c + * key.c */ extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type); /* - * cf-namei.c + * namei.c */ extern int cachefiles_delete_object(struct cachefiles_cache *cache, struct cachefiles_object *object); @@ -165,7 +165,7 @@ extern int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir, char *filename); /* - * cf-proc.c + * proc.c */ #ifdef CONFIG_CACHEFILES_HISTOGRAM extern atomic_t cachefiles_lookup_histogram[HZ]; @@ -190,7 +190,7 @@ void cachefiles_hist(atomic_t histogram[], unsigned long start_jif) #endif /* - * cf-rdwr.c + * rdwr.c */ extern int cachefiles_read_or_alloc_page(struct fscache_retrieval *, struct page *, gfp_t); @@ -205,7 +205,7 @@ extern int cachefiles_write_page(struct fscache_storage *, struct page *); extern void cachefiles_uncache_page(struct fscache_object *, struct page *); /* - * cf-security.c + * security.c */ extern int cachefiles_get_security_ID(struct cachefiles_cache *cache); extern int cachefiles_determine_cache_security(struct cachefiles_cache *cache, @@ -225,7 +225,7 @@ static inline void cachefiles_end_secure(struct cachefiles_cache *cache, } /* - * cf-xattr.c + * xattr.c */ extern int cachefiles_check_object_type(struct cachefiles_object *object); extern int cachefiles_set_object_xattr(struct cachefiles_object *object, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 5759ba53dc96..d06260251c30 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -2475,7 +2475,7 @@ querySymLinkRetry: /* BB FIXME investigate remapping reserved chars here */ *symlinkinfo = cifs_strndup_from_ucs(data_start, count, is_unicode, nls_codepage); - if (!symlinkinfo) + if (!*symlinkinfo) rc = -ENOMEM; } } diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 11431ed72a7f..3758965d73d5 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -225,6 +225,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode, if (!(oflags & FMODE_READ)) write_only = true; + mode &= ~current_umask(); rc = CIFSPOSIXCreate(xid, cifs_sb->tcon, posix_flags, mode, pnetfid, presp_data, &oplock, full_path, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & @@ -310,7 +311,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, return -ENOMEM; } - mode &= ~current_umask(); if (oplockEnabled) oplock = REQ_OPLOCK; @@ -336,7 +336,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, else /* success, no need to query */ goto cifs_create_set_dentry; } else if ((rc != -EIO) && (rc != -EREMOTE) && - (rc != -EOPNOTSUPP)) /* path not found or net err */ + (rc != -EOPNOTSUPP) && (rc != -EINVAL)) goto cifs_create_out; /* else fallthrough to retry, using older open call, this is case where server does not support this SMB level, and @@ -609,7 +609,6 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, int xid; int rc = 0; /* to get around spurious gcc warning, set to zero here */ int oplock = 0; - int mode; __u16 fileHandle = 0; bool posix_open = false; struct cifs_sb_info *cifs_sb; @@ -658,30 +657,36 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, } cFYI(1, ("Full path: %s inode = 0x%p", full_path, direntry->d_inode)); + /* Posix open is only called (at lookup time) for file create now. + * For opens (rather than creates), because we do not know if it + * is a file or directory yet, and current Samba no longer allows + * us to do posix open on dirs, we could end up wasting an open call + * on what turns out to be a dir. For file opens, we wait to call posix + * open till cifs_open. It could be added here (lookup) in the future + * but the performance tradeoff of the extra network request when EISDIR + * or EACCES is returned would have to be weighed against the 50% + * reduction in network traffic in the other paths. + */ if (pTcon->unix_ext) { if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && - (nd->flags & LOOKUP_OPEN)) { - if (!((nd->intent.open.flags & O_CREAT) && - (nd->intent.open.flags & O_EXCL))) { - mode = nd->intent.open.create_mode & - ~current_umask(); - rc = cifs_posix_open(full_path, &newInode, - parent_dir_inode->i_sb, mode, + (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && + (nd->intent.open.flags & O_CREAT)) { + rc = cifs_posix_open(full_path, &newInode, + parent_dir_inode->i_sb, + nd->intent.open.create_mode, nd->intent.open.flags, &oplock, &fileHandle, xid); - /* - * This code works around a bug in - * samba posix open in samba versions 3.3.1 - * and earlier where create works - * but open fails with invalid parameter. - * If either of these error codes are - * returned, follow the normal lookup. - * Otherwise, the error during posix open - * is handled. - */ - if ((rc != -EINVAL) && (rc != -EOPNOTSUPP)) - posix_open = true; - } + /* + * The check below works around a bug in POSIX + * open in samba versions 3.3.1 and earlier where + * open could incorrectly fail with invalid parameter. + * If either that or op not supported returned, follow + * the normal lookup. + */ + if ((rc == 0) || (rc == -ENOENT)) + posix_open = true; + else if ((rc == -EINVAL) || (rc != -EOPNOTSUPP)) + pTcon->broken_posix_open = true; } if (!posix_open) rc = cifs_get_inode_info_unix(&newInode, full_path, diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 38c06f826575..302ea15f02e6 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -130,10 +130,6 @@ static inline int cifs_posix_open_inode_helper(struct inode *inode, struct cifsFileInfo *pCifsFile, int oplock, u16 netfid) { - file->private_data = kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); - if (file->private_data == NULL) - return -ENOMEM; - pCifsFile = cifs_init_private(file->private_data, inode, file, netfid); write_lock(&GlobalSMBSeslock); pCifsInode = CIFS_I(file->f_path.dentry->d_inode); @@ -184,6 +180,38 @@ psx_client_can_cache: return 0; } +static struct cifsFileInfo * +cifs_fill_filedata(struct file *file) +{ + struct list_head *tmp; + struct cifsFileInfo *pCifsFile = NULL; + struct cifsInodeInfo *pCifsInode = NULL; + + /* search inode for this file and fill in file->private_data */ + pCifsInode = CIFS_I(file->f_path.dentry->d_inode); + read_lock(&GlobalSMBSeslock); + list_for_each(tmp, &pCifsInode->openFileList) { + pCifsFile = list_entry(tmp, struct cifsFileInfo, flist); + if ((pCifsFile->pfile == NULL) && + (pCifsFile->pid == current->tgid)) { + /* mode set in cifs_create */ + + /* needed for writepage */ + pCifsFile->pfile = file; + file->private_data = pCifsFile; + break; + } + } + read_unlock(&GlobalSMBSeslock); + + if (file->private_data != NULL) { + return pCifsFile; + } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL)) + cERROR(1, ("could not find file instance for " + "new file %p", file)); + return NULL; +} + /* all arguments to this function must be checked for validity in caller */ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile, @@ -258,7 +286,6 @@ int cifs_open(struct inode *inode, struct file *file) struct cifsTconInfo *tcon; struct cifsFileInfo *pCifsFile; struct cifsInodeInfo *pCifsInode; - struct list_head *tmp; char *full_path = NULL; int desiredAccess; int disposition; @@ -270,32 +297,12 @@ int cifs_open(struct inode *inode, struct file *file) cifs_sb = CIFS_SB(inode->i_sb); tcon = cifs_sb->tcon; - /* search inode for this file and fill in file->private_data */ pCifsInode = CIFS_I(file->f_path.dentry->d_inode); - read_lock(&GlobalSMBSeslock); - list_for_each(tmp, &pCifsInode->openFileList) { - pCifsFile = list_entry(tmp, struct cifsFileInfo, - flist); - if ((pCifsFile->pfile == NULL) && - (pCifsFile->pid == current->tgid)) { - /* mode set in cifs_create */ - - /* needed for writepage */ - pCifsFile->pfile = file; - - file->private_data = pCifsFile; - break; - } - } - read_unlock(&GlobalSMBSeslock); - - if (file->private_data != NULL) { - rc = 0; + pCifsFile = cifs_fill_filedata(file); + if (pCifsFile) { FreeXid(xid); - return rc; - } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL)) - cERROR(1, ("could not find file instance for " - "new file %p", file)); + return 0; + } full_path = build_path_from_dentry(file->f_path.dentry); if (full_path == NULL) { @@ -325,6 +332,7 @@ int cifs_open(struct inode *inode, struct file *file) /* no need for special case handling of setting mode on read only files needed here */ + pCifsFile = cifs_fill_filedata(file); cifs_posix_open_inode_helper(inode, file, pCifsInode, pCifsFile, oplock, netfid); goto out; diff --git a/fs/cifs/link.c b/fs/cifs/link.c index ea9d11e3dcbb..cd83c53fcbb5 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -107,48 +107,48 @@ void * cifs_follow_link(struct dentry *direntry, struct nameidata *nd) { struct inode *inode = direntry->d_inode; - int rc = -EACCES; + int rc = -ENOMEM; int xid; char *full_path = NULL; - char *target_path = ERR_PTR(-ENOMEM); - struct cifs_sb_info *cifs_sb; - struct cifsTconInfo *pTcon; + char *target_path = NULL; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsTconInfo *tcon = cifs_sb->tcon; xid = GetXid(); - full_path = build_path_from_dentry(direntry); + /* + * For now, we just handle symlinks with unix extensions enabled. + * Eventually we should handle NTFS reparse points, and MacOS + * symlink support. For instance... + * + * rc = CIFSSMBQueryReparseLinkInfo(...) + * + * For now, just return -EACCES when the server doesn't support posix + * extensions. Note that we still allow querying symlinks when posix + * extensions are manually disabled. We could disable these as well + * but there doesn't seem to be any harm in allowing the client to + * read them. + */ + if (!(tcon->ses->capabilities & CAP_UNIX)) { + rc = -EACCES; + goto out; + } + full_path = build_path_from_dentry(direntry); if (!full_path) goto out; cFYI(1, ("Full path: %s inode = 0x%p", full_path, inode)); - cifs_sb = CIFS_SB(inode->i_sb); - pTcon = cifs_sb->tcon; - - /* We could change this to: - if (pTcon->unix_ext) - but there does not seem any point in refusing to - get symlink info if we can, even if unix extensions - turned off for this mount */ - - if (pTcon->ses->capabilities & CAP_UNIX) - rc = CIFSSMBUnixQuerySymLink(xid, pTcon, full_path, - &target_path, - cifs_sb->local_nls); - else { - /* BB add read reparse point symlink code here */ - /* rc = CIFSSMBQueryReparseLinkInfo */ - /* BB Add code to Query ReparsePoint info */ - /* BB Add MAC style xsymlink check here if enabled */ - } + rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, &target_path, + cifs_sb->local_nls); + kfree(full_path); +out: if (rc != 0) { kfree(target_path); target_path = ERR_PTR(rc); } - kfree(full_path); -out: FreeXid(xid); nd_set_link(nd, target_path); return NULL; diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index e0cbd16f6dc9..1c341304621f 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -28,7 +28,7 @@ #define FSCACHE_MAX_THREADS 32 /* - * fsc-cache.c + * cache.c */ extern struct list_head fscache_cache_list; extern struct rw_semaphore fscache_addremove_sem; @@ -37,7 +37,7 @@ extern struct fscache_cache *fscache_select_cache_for_object( struct fscache_cookie *); /* - * fsc-cookie.c + * cookie.c */ extern struct kmem_cache *fscache_cookie_jar; @@ -45,13 +45,13 @@ extern void fscache_cookie_init_once(void *); extern void __fscache_cookie_put(struct fscache_cookie *); /* - * fsc-fsdef.c + * fsdef.c */ extern struct fscache_cookie fscache_fsdef_index; extern struct fscache_cookie_def fscache_fsdef_netfs_def; /* - * fsc-histogram.c + * histogram.c */ #ifdef CONFIG_FSCACHE_HISTOGRAM extern atomic_t fscache_obj_instantiate_histogram[HZ]; @@ -75,7 +75,7 @@ extern const struct file_operations fscache_histogram_fops; #endif /* - * fsc-main.c + * main.c */ extern unsigned fscache_defer_lookup; extern unsigned fscache_defer_create; @@ -86,14 +86,14 @@ extern int fscache_wait_bit(void *); extern int fscache_wait_bit_interruptible(void *); /* - * fsc-object.c + * object.c */ extern void fscache_withdrawing_object(struct fscache_cache *, struct fscache_object *); extern void fscache_enqueue_object(struct fscache_object *); /* - * fsc-operation.c + * operation.c */ extern int fscache_submit_exclusive_op(struct fscache_object *, struct fscache_operation *); @@ -104,7 +104,7 @@ extern void fscache_start_operations(struct fscache_object *); extern void fscache_operation_gc(struct work_struct *); /* - * fsc-proc.c + * proc.c */ #ifdef CONFIG_PROC_FS extern int __init fscache_proc_init(void); @@ -115,7 +115,7 @@ extern void fscache_proc_cleanup(void); #endif /* - * fsc-stats.c + * stats.c */ #ifdef CONFIG_FSCACHE_STATS extern atomic_t fscache_n_ops_processed[FSCACHE_MAX_THREADS]; diff --git a/fs/inode.c b/fs/inode.c index 0571983755dc..bca0c618fdb3 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -219,6 +219,7 @@ static struct inode *alloc_inode(struct super_block *sb) void destroy_inode(struct inode *inode) { BUG_ON(inode_has_buffers(inode)); + ima_inode_free(inode); security_inode_free(inode); if (inode->i_sb->s_op->destroy_inode) inode->i_sb->s_op->destroy_inode(inode); @@ -1053,13 +1054,22 @@ int insert_inode_locked(struct inode *inode) struct super_block *sb = inode->i_sb; ino_t ino = inode->i_ino; struct hlist_head *head = inode_hashtable + hash(sb, ino); - struct inode *old; inode->i_state |= I_LOCK|I_NEW; while (1) { + struct hlist_node *node; + struct inode *old = NULL; spin_lock(&inode_lock); - old = find_inode_fast(sb, head, ino); - if (likely(!old)) { + hlist_for_each_entry(old, node, head, i_hash) { + if (old->i_ino != ino) + continue; + if (old->i_sb != sb) + continue; + if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) + continue; + break; + } + if (likely(!node)) { hlist_add_head(&inode->i_hash, head); spin_unlock(&inode_lock); return 0; @@ -1081,14 +1091,24 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, { struct super_block *sb = inode->i_sb; struct hlist_head *head = inode_hashtable + hash(sb, hashval); - struct inode *old; inode->i_state |= I_LOCK|I_NEW; while (1) { + struct hlist_node *node; + struct inode *old = NULL; + spin_lock(&inode_lock); - old = find_inode(sb, head, test, data); - if (likely(!old)) { + hlist_for_each_entry(old, node, head, i_hash) { + if (old->i_sb != sb) + continue; + if (!test(old, data)) + continue; + if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) + continue; + break; + } + if (likely(!node)) { hlist_add_head(&inode->i_hash, head); spin_unlock(&inode_lock); return 0; diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 06560c520f49..618e21c0b7a3 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -241,7 +241,7 @@ write_out_data: spin_lock(&journal->j_list_lock); } /* Someone already cleaned up the buffer? */ - if (!buffer_jbd(bh) + if (!buffer_jbd(bh) || bh2jh(bh) != jh || jh->b_transaction != commit_transaction || jh->b_jlist != BJ_SyncData) { jbd_unlock_bh_state(bh); @@ -478,7 +478,9 @@ void journal_commit_transaction(journal_t *journal) spin_lock(&journal->j_list_lock); continue; } - if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { + if (buffer_jbd(bh) && bh2jh(bh) == jh && + jh->b_transaction == commit_transaction && + jh->b_jlist == BJ_Locked) { __journal_unfile_buffer(jh); jbd_unlock_bh_state(bh); journal_remove_journal_head(bh); diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index c32b4a1ad6cf..a0244740b75a 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -480,13 +480,6 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb return; filebad: - mutex_lock(&c->erase_free_sem); - spin_lock(&c->erase_completion_lock); - /* Stick it on a list (any list) so erase_failed can take it - right off again. Silly, but shouldn't happen often. */ - list_move(&jeb->list, &c->erasing_list); - spin_unlock(&c->erase_completion_lock); - mutex_unlock(&c->erase_free_sem); jffs2_erase_failed(c, jeb, bad_offset); return; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 370b190a09d1..89f98e9a024b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1943,7 +1943,8 @@ int nfs_permission(struct inode *inode, int mask) case S_IFREG: /* NFSv4 has atomic_open... */ if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN) - && (mask & MAY_OPEN)) + && (mask & MAY_OPEN) + && !(mask & MAY_EXEC)) goto out; break; case S_IFDIR: diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a4d242680299..4674f8092da8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2594,12 +2594,9 @@ static void nfs4_renew_done(struct rpc_task *task, void *data) unsigned long timestamp = (unsigned long)data; if (task->tk_status < 0) { - switch (task->tk_status) { - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_EXPIRED: - case -NFS4ERR_CB_PATH_DOWN: - nfs4_schedule_state_recovery(clp); - } + /* Unless we're shutting down, schedule state recovery! */ + if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) + nfs4_schedule_state_recovery(clp); return; } spin_lock(&clp->cl_lock); diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index d9ef602fbc5a..e3ed5908820b 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -129,7 +129,7 @@ enum { Opt_err }; -static match_table_t __initconst tokens = { +static const match_table_t tokens __initconst = { {Opt_port, "port=%u"}, {Opt_rsize, "rsize=%u"}, {Opt_wsize, "wsize=%u"}, diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 6c68ffd6b4bb..b660435978d2 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1015,6 +1015,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); set_fs(oldfs); if (host_err >= 0) { + *cnt = host_err; nfsdstats.io_write += host_err; fsnotify_modify(file->f_path.dentry); } @@ -1060,10 +1061,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, } dprintk("nfsd: write complete host_err=%d\n", host_err); - if (host_err >= 0) { + if (host_err >= 0) err = 0; - *cnt = host_err; - } else + else err = nfserrno(host_err); out: return err; diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index e90b60dfced9..300f1cdfa862 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -311,7 +311,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) { if (ret != -ENOENT) - goto out_sem; + goto out_header; /* skip hole */ ret = 0; continue; @@ -344,7 +344,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, continue; printk(KERN_ERR "%s: cannot delete block\n", __func__); - goto out_sem; + goto out_header; } } @@ -361,6 +361,8 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, nilfs_mdt_mark_dirty(cpfile); kunmap_atomic(kaddr, KM_USER0); } + + out_header: brelse(header_bh); out_sem: diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 50ff3f2cdf24..d6759b92006f 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -576,7 +576,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); out_free: - while (--n > 0) + while (--n >= 0) vfree(kbufs[n]); kfree(kbufs[4]); return ret; diff --git a/fs/proc/base.c b/fs/proc/base.c index fb45615943c2..3326bbf9ab95 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1956,7 +1956,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, const struct pid_entry *p = ptr; struct inode *inode; struct proc_inode *ei; - struct dentry *error = ERR_PTR(-EINVAL); + struct dentry *error = ERR_PTR(-ENOENT); inode = proc_pid_make_inode(dir->i_sb, task); if (!inode) diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c index 9bca39cf99ee..1afa4dd4cae2 100644 --- a/fs/proc/loadavg.c +++ b/fs/proc/loadavg.c @@ -12,20 +12,14 @@ static int loadavg_proc_show(struct seq_file *m, void *v) { - int a, b, c; - unsigned long seq; + unsigned long avnrun[3]; - do { - seq = read_seqbegin(&xtime_lock); - a = avenrun[0] + (FIXED_1/200); - b = avenrun[1] + (FIXED_1/200); - c = avenrun[2] + (FIXED_1/200); - } while (read_seqretry(&xtime_lock, seq)); + get_avenrun(avnrun, FIXED_1/200, 0); - seq_printf(m, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n", - LOAD_INT(a), LOAD_FRAC(a), - LOAD_INT(b), LOAD_FRAC(b), - LOAD_INT(c), LOAD_FRAC(c), + seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n", + LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]), + LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]), + LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]), nr_running(), nr_threads, task_active_pid_ns(current)->last_pid); return 0; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 1215a4f50cd2..3567fb9e3fb1 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -448,13 +448,11 @@ int remove_save_link(struct inode *inode, int truncate) static void reiserfs_kill_sb(struct super_block *s) { if (REISERFS_SB(s)) { -#ifdef CONFIG_REISERFS_FS_XATTR if (REISERFS_SB(s)->xattr_root) { d_invalidate(REISERFS_SB(s)->xattr_root); dput(REISERFS_SB(s)->xattr_root); REISERFS_SB(s)->xattr_root = NULL; } -#endif if (REISERFS_SB(s)->priv_root) { d_invalidate(REISERFS_SB(s)->priv_root); dput(REISERFS_SB(s)->priv_root); diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 2237e10c7c7c..8e7deb0e6964 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -123,7 +123,9 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags) mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR); xaroot = dget(REISERFS_SB(sb)->xattr_root); - if (!xaroot->d_inode) { + if (!xaroot) + xaroot = ERR_PTR(-ENODATA); + else if (!xaroot->d_inode) { int err = -ENODATA; if (xattr_may_create(flags)) err = xattr_mkdir(privroot->d_inode, xaroot, 0700); @@ -685,20 +687,6 @@ out: return err; } -/* Actual operations that are exported to VFS-land */ -struct xattr_handler *reiserfs_xattr_handlers[] = { - &reiserfs_xattr_user_handler, - &reiserfs_xattr_trusted_handler, -#ifdef CONFIG_REISERFS_FS_SECURITY - &reiserfs_xattr_security_handler, -#endif -#ifdef CONFIG_REISERFS_FS_POSIX_ACL - &reiserfs_posix_acl_access_handler, - &reiserfs_posix_acl_default_handler, -#endif - NULL -}; - /* * In order to implement different sets of xattr operations for each xattr * prefix with the generic xattr API, a filesystem should create a @@ -883,23 +871,6 @@ static int reiserfs_check_acl(struct inode *inode, int mask) return error; } -int reiserfs_permission(struct inode *inode, int mask) -{ - /* - * We don't do permission checks on the internal objects. - * Permissions are determined by the "owning" object. - */ - if (IS_PRIVATE(inode)) - return 0; - /* - * Stat data v1 doesn't support ACLs. - */ - if (get_inode_sd_version(inode) == STAT_DATA_V1) - return generic_permission(inode, mask, NULL); - else - return generic_permission(inode, mask, reiserfs_check_acl); -} - static int create_privroot(struct dentry *dentry) { int err; @@ -922,6 +893,28 @@ static int create_privroot(struct dentry *dentry) return 0; } +#else +int __init reiserfs_xattr_register_handlers(void) { return 0; } +void reiserfs_xattr_unregister_handlers(void) {} +static int create_privroot(struct dentry *dentry) { return 0; } +#endif + +/* Actual operations that are exported to VFS-land */ +struct xattr_handler *reiserfs_xattr_handlers[] = { +#ifdef CONFIG_REISERFS_FS_XATTR + &reiserfs_xattr_user_handler, + &reiserfs_xattr_trusted_handler, +#endif +#ifdef CONFIG_REISERFS_FS_SECURITY + &reiserfs_xattr_security_handler, +#endif +#ifdef CONFIG_REISERFS_FS_POSIX_ACL + &reiserfs_posix_acl_access_handler, + &reiserfs_posix_acl_default_handler, +#endif + NULL +}; + static int xattr_mount_check(struct super_block *s) { /* We need generation numbers to ensure that the oid mapping is correct @@ -941,10 +934,24 @@ static int xattr_mount_check(struct super_block *s) return 0; } -#else -int __init reiserfs_xattr_register_handlers(void) { return 0; } -void reiserfs_xattr_unregister_handlers(void) {} +int reiserfs_permission(struct inode *inode, int mask) +{ + /* + * We don't do permission checks on the internal objects. + * Permissions are determined by the "owning" object. + */ + if (IS_PRIVATE(inode)) + return 0; + +#ifdef CONFIG_REISERFS_FS_XATTR + /* + * Stat data v1 doesn't support ACLs. + */ + if (get_inode_sd_version(inode) != STAT_DATA_V1) + return generic_permission(inode, mask, reiserfs_check_acl); #endif + return generic_permission(inode, mask, NULL); +} /* This will catch lookups from the fs root to .reiserfs_priv */ static int @@ -992,7 +999,6 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) int err = 0; struct dentry *privroot = REISERFS_SB(s)->priv_root; -#ifdef CONFIG_REISERFS_FS_XATTR err = xattr_mount_check(s); if (err) goto error; @@ -1023,14 +1029,11 @@ error: clear_bit(REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt)); clear_bit(REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt)); } -#endif /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ -#ifdef CONFIG_REISERFS_FS_POSIX_ACL if (reiserfs_posixacl(s)) s->s_flags |= MS_POSIXACL; else -#endif s->s_flags &= ~MS_POSIXACL; return err; diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index b1606e07b7a3..561a9c050cef 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -723,7 +723,7 @@ int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), mutex_unlock(&sysfs_workq_mutex); if (sysfs_workqueue == NULL) { - sysfs_workqueue = create_workqueue("sysfsd"); + sysfs_workqueue = create_singlethread_workqueue("sysfsd"); if (sysfs_workqueue == NULL) { module_put(owner); return -ENOMEM; diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index af6843c7ee4b..179cbd630f69 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -103,7 +103,7 @@ extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); static inline int kmem_shake_allow(gfp_t gfp_mask) { - return (gfp_mask & __GFP_WAIT) != 0; + return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)); } #endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index e6d839bddbf0..7465f9ee125f 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -347,13 +347,15 @@ xfs_swap_extents( error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT); -out_unlock: - xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); out: kmem_free(tempifp); return error; +out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + goto out; + out_trans_cancel: xfs_trans_cancel(tp, 0); goto out_unlock; diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 8379e3bca26c..cbd451bb4848 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -160,7 +160,7 @@ xfs_growfs_data_private( nagcount = new + (nb_mod != 0); if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) { nagcount--; - nb = nagcount * mp->m_sb.sb_agblocks; + nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks; if (nb < mp->m_sb.sb_dblocks) return XFS_ERROR(EINVAL); } diff --git a/include/Kbuild b/include/Kbuild index d8c3e3cbf416..fe36accd4328 100644 --- a/include/Kbuild +++ b/include/Kbuild @@ -8,3 +8,4 @@ header-y += mtd/ header-y += rdma/ header-y += video/ header-y += drm/ +header-y += xen/ diff --git a/include/asm-generic/local.h b/include/asm-generic/local.h index dbd6150763e9..fc218444e315 100644 --- a/include/asm-generic/local.h +++ b/include/asm-generic/local.h @@ -42,7 +42,7 @@ typedef struct #define local_cmpxchg(l, o, n) atomic_long_cmpxchg((&(l)->a), (o), (n)) #define local_xchg(l, n) atomic_long_xchg((&(l)->a), (n)) -#define local_add_unless(l, a, u) atomic_long_add_unless((&(l)->a), (a), (u)) +#define local_add_unless(l, _a, u) atomic_long_add_unless((&(l)->a), (_a), (u)) #define local_inc_not_zero(l) atomic_long_inc_not_zero(&(l)->a) /* Non-atomic variants, ie. preemption disabled and won't be touched diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 8e6d0ca70aba..e410f602cab1 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -280,17 +280,18 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, #endif /* - * A facility to provide batching of the reload of page tables with the - * actual context switch code for paravirtualized guests. By convention, - * only one of the lazy modes (CPU, MMU) should be active at any given - * time, entry should never be nested, and entry and exits should always - * be paired. This is for sanity of maintaining and reasoning about the - * kernel code. + * A facility to provide batching of the reload of page tables and + * other process state with the actual context switch code for + * paravirtualized guests. By convention, only one of the batched + * update (lazy) modes (CPU, MMU) should be active at any given time, + * entry should never be nested, and entry and exits should always be + * paired. This is for sanity of maintaining and reasoning about the + * kernel code. In this case, the exit (end of the context switch) is + * in architecture-specific code, and so doesn't need a generic + * definition. */ -#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE -#define arch_enter_lazy_cpu_mode() do {} while (0) -#define arch_leave_lazy_cpu_mode() do {} while (0) -#define arch_flush_lazy_cpu_mode() do {} while (0) +#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH +#define arch_start_context_switch(prev) do {} while (0) #endif #ifndef __HAVE_PFNMAP_TRACKING diff --git a/include/drm/drmP.h b/include/drm/drmP.h index c8c422151431..b84d8ae35e6f 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1519,6 +1519,30 @@ static __inline__ void *drm_calloc(size_t nmemb, size_t size, int area) { return kcalloc(nmemb, size, GFP_KERNEL); } + +static __inline__ void *drm_calloc_large(size_t nmemb, size_t size) +{ + u8 *addr; + + if (size <= PAGE_SIZE) + return kcalloc(nmemb, size, GFP_KERNEL); + + addr = vmalloc(nmemb * size); + if (!addr) + return NULL; + + memset(addr, 0, nmemb * size); + + return addr; +} + +static __inline void drm_free_large(void *ptr) +{ + if (!is_vmalloc_addr(ptr)) + return kfree(ptr); + + vfree(ptr); +} #else extern void *drm_alloc(size_t size, int area); extern void drm_free(void *pt, size_t size, int area); diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 3c1924c010e8..7300fb866767 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -471,6 +471,9 @@ struct drm_connector { u32 property_ids[DRM_CONNECTOR_MAX_PROPERTY]; uint64_t property_values[DRM_CONNECTOR_MAX_PROPERTY]; + /* requested DPMS state */ + int dpms; + void *helper_private; uint32_t encoder_ids[DRM_CONNECTOR_MAX_ENCODER]; diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h index ec073d8288d9..6769ff6c1bc0 100644 --- a/include/drm/drm_crtc_helper.h +++ b/include/drm/drm_crtc_helper.h @@ -99,6 +99,8 @@ extern bool drm_crtc_helper_set_mode(struct drm_crtc *crtc, struct drm_framebuffer *old_fb); extern bool drm_helper_crtc_in_use(struct drm_crtc *crtc); +extern void drm_helper_connector_dpms(struct drm_connector *connector, int mode); + extern int drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb, struct drm_mode_fb_cmd *mode_cmd); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 88be890ee3c7..51b4b0a5ce8c 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -119,7 +119,7 @@ extern int pci_mmcfg_config_num; extern int sbf_port; extern unsigned long acpi_realmode_flags; -int acpi_register_gsi (u32 gsi, int triggering, int polarity); +int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity); int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); #ifdef CONFIG_X86_IO_APIC diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 51e6e54b2aa1..9b93cafa82a0 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -28,7 +28,7 @@ struct amba_id { struct amba_driver { struct device_driver drv; - int (*probe)(struct amba_device *, void *); + int (*probe)(struct amba_device *, struct amba_id *); int (*remove)(struct amba_device *); void (*shutdown)(struct amba_device *); int (*suspend)(struct amba_device *, pm_message_t); diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index 48ee32a18ac5..64a982ea5d5f 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -159,6 +159,7 @@ #define UART01x_FR_MODEM_ANY (UART01x_FR_DCD|UART01x_FR_DSR|UART01x_FR_CTS) #ifndef __ASSEMBLY__ +struct amba_device; /* in uncompress this is included but amba/bus.h is not */ struct amba_pl010_data { void (*set_mctrl)(struct amba_device *dev, void __iomem *base, unsigned int mctrl); }; diff --git a/include/linux/auto_fs.h b/include/linux/auto_fs.h index 63265852b7d1..7b09c8348fd3 100644 --- a/include/linux/auto_fs.h +++ b/include/linux/auto_fs.h @@ -14,13 +14,12 @@ #ifndef _LINUX_AUTO_FS_H #define _LINUX_AUTO_FS_H +#include <linux/types.h> #ifdef __KERNEL__ #include <linux/fs.h> #include <linux/limits.h> -#include <linux/types.h> #include <linux/ioctl.h> #else -#include <asm/types.h> #include <sys/ioctl.h> #endif /* __KERNEL__ */ diff --git a/include/linux/compat.h b/include/linux/compat.h index f2ded21f9a3c..af931ee43dd8 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -222,6 +222,8 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from); int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from); int get_compat_sigevent(struct sigevent *event, const struct compat_sigevent __user *u_event); +long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, + struct compat_siginfo __user *uinfo); static inline int compat_timeval_compare(struct compat_timeval *lhs, struct compat_timeval *rhs) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 9f315382610b..c5ac87ca7bc6 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1022,6 +1022,8 @@ typedef struct cpumask *cpumask_var_t; bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); +bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); +bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); void alloc_bootmem_cpumask_var(cpumask_var_t *mask); void free_cpumask_var(cpumask_var_t mask); void free_bootmem_cpumask_var(cpumask_var_t mask); @@ -1040,6 +1042,19 @@ static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, return true; } +static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +{ + cpumask_clear(*mask); + return true; +} + +static inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, + int node) +{ + cpumask_clear(*mask); + return true; +} + static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) { } diff --git a/include/linux/cred.h b/include/linux/cred.h index 3282ee4318e7..4fa999696310 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -13,6 +13,7 @@ #define _LINUX_CRED_H #include <linux/capability.h> +#include <linux/init.h> #include <linux/key.h> #include <asm/atomic.h> diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index 28d53cb7b5a2..171ad8aedc83 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -32,6 +32,8 @@ extern void dma_debug_add_bus(struct bus_type *bus); extern void dma_debug_init(u32 num_entries); +extern int dma_debug_resize_entries(u32 num_entries); + extern void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr, @@ -91,6 +93,11 @@ static inline void dma_debug_init(u32 num_entries) { } +static inline int dma_debug_resize_entries(u32 num_entries) +{ + return 0; +} + static inline void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr, diff --git a/include/linux/dmar.h b/include/linux/dmar.h index e397dc342cda..10ff5c498824 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -108,6 +108,7 @@ struct irte { }; #ifdef CONFIG_INTR_REMAP extern int intr_remapping_enabled; +extern int intr_remapping_supported(void); extern int enable_intr_remapping(int); extern void disable_intr_remapping(void); extern int reenable_intr_remapping(int); @@ -157,6 +158,8 @@ static inline struct intel_iommu *map_ioapic_to_ir(int apic) } #define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) +#define disable_intr_remapping() (0) +#define reenable_intr_remapping(mode) (0) #define intr_remapping_enabled (0) #endif diff --git a/include/linux/futex.h b/include/linux/futex.h index 3bf5bb5a34f9..34956c8fdebf 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -23,6 +23,8 @@ union ktime; #define FUTEX_TRYLOCK_PI 8 #define FUTEX_WAIT_BITSET 9 #define FUTEX_WAKE_BITSET 10 +#define FUTEX_WAIT_REQUEUE_PI 11 +#define FUTEX_CMP_REQUEUE_PI 12 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 @@ -38,6 +40,10 @@ union ktime; #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG) #define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG) +#define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) +#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) /* * Support for robust futexes: the kernel cleans up held futexes at diff --git a/include/linux/i7300_idle.h b/include/linux/i7300_idle.h index 05a80c44513c..1587b7dec505 100644 --- a/include/linux/i7300_idle.h +++ b/include/linux/i7300_idle.h @@ -16,35 +16,33 @@ struct fbd_ioat { unsigned int vendor; unsigned int ioat_dev; + unsigned int enabled; }; /* * The i5000 chip-set has the same hooks as the i7300 - * but support is disabled by default because this driver - * has not been validated on that platform. + * but it is not enabled by default and must be manually + * manually enabled with "forceload=1" because it is + * only lightly validated. */ -#define SUPPORT_I5000 0 static const struct fbd_ioat fbd_ioat_list[] = { - {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB}, -#if SUPPORT_I5000 - {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT}, -#endif + {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB, 1}, + {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT, 0}, {0, 0} }; /* table of devices that work with this driver */ static const struct pci_device_id pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_FBD_CNB) }, -#if SUPPORT_I5000 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5000_ERR) }, -#endif { } /* Terminating entry */ }; /* Check for known platforms with I/O-AT */ static inline int i7300_idle_platform_probe(struct pci_dev **fbd_dev, - struct pci_dev **ioat_dev) + struct pci_dev **ioat_dev, + int enable_all) { int i; struct pci_dev *memdev, *dmadev; @@ -69,6 +67,8 @@ static inline int i7300_idle_platform_probe(struct pci_dev **fbd_dev, for (i = 0; fbd_ioat_list[i].vendor != 0; i++) { if (dmadev->vendor == fbd_ioat_list[i].vendor && dmadev->device == fbd_ioat_list[i].ioat_dev) { + if (!(fbd_ioat_list[i].enabled || enable_all)) + continue; if (fbd_dev) *fbd_dev = memdev; if (ioat_dev) diff --git a/include/linux/ide.h b/include/linux/ide.h index ff65fffb078f..9fed365a598b 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1109,7 +1109,7 @@ void ide_fix_driveid(u16 *); extern void ide_fixstring(u8 *, const int, const int); -int ide_busy_sleep(ide_hwif_t *, unsigned long, int); +int ide_busy_sleep(ide_drive_t *, unsigned long, int); int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long); diff --git a/include/linux/input.h b/include/linux/input.h index 0e6ff5de3588..6fed4f6a9c9e 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -656,6 +656,7 @@ struct input_absinfo { #define ABS_MT_POSITION_Y 0x36 /* Center Y ellipse position */ #define ABS_MT_TOOL_TYPE 0x37 /* Type of touching device */ #define ABS_MT_BLOB_ID 0x38 /* Group a set of packets as a blob */ +#define ABS_MT_TRACKING_ID 0x39 /* Unique ID of initiated contact */ #define ABS_MAX 0x3f #define ABS_CNT (ABS_MAX+1) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 91bb76f44f14..ff374ceface0 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -566,6 +566,6 @@ struct irq_desc; extern int early_irq_init(void); extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); -extern int arch_init_chip_data(struct irq_desc *desc, int cpu); +extern int arch_init_chip_data(struct irq_desc *desc, int node); #endif diff --git a/include/linux/irq.h b/include/linux/irq.h index b7cbeed972e4..eedbb8e5e0cc 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -117,7 +117,7 @@ struct irq_chip { void (*eoi)(unsigned int irq); void (*end)(unsigned int irq); - void (*set_affinity)(unsigned int irq, + int (*set_affinity)(unsigned int irq, const struct cpumask *dest); int (*retrigger)(unsigned int irq); int (*set_type)(unsigned int irq, unsigned int flow_type); @@ -187,7 +187,7 @@ struct irq_desc { spinlock_t lock; #ifdef CONFIG_SMP cpumask_var_t affinity; - unsigned int cpu; + unsigned int node; #ifdef CONFIG_GENERIC_PENDING_IRQ cpumask_var_t pending_mask; #endif @@ -201,26 +201,23 @@ struct irq_desc { } ____cacheline_internodealigned_in_smp; extern void arch_init_copy_chip_data(struct irq_desc *old_desc, - struct irq_desc *desc, int cpu); + struct irq_desc *desc, int node); extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); #ifndef CONFIG_SPARSE_IRQ extern struct irq_desc irq_desc[NR_IRQS]; -#else /* CONFIG_SPARSE_IRQ */ -extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu); -#endif /* CONFIG_SPARSE_IRQ */ - -extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu); +#endif -static inline struct irq_desc * -irq_remap_to_desc(unsigned int irq, struct irq_desc *desc) -{ -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - return irq_to_desc(irq); +#ifdef CONFIG_NUMA_IRQ_DESC +extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node); #else +static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) +{ return desc; -#endif } +#endif + +extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node); /* * Migration helpers for obsolete names, they will go away: @@ -386,7 +383,7 @@ extern void set_irq_noprobe(unsigned int irq); extern void set_irq_probe(unsigned int irq); /* Handle dynamic irq creation and destruction */ -extern unsigned int create_irq_nr(unsigned int irq_want); +extern unsigned int create_irq_nr(unsigned int irq_want, int node); extern int create_irq(void); extern void destroy_irq(unsigned int irq); @@ -424,47 +421,48 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); #ifdef CONFIG_SMP /** - * init_alloc_desc_masks - allocate cpumasks for irq_desc + * alloc_desc_masks - allocate cpumasks for irq_desc * @desc: pointer to irq_desc struct * @cpu: cpu which will be handling the cpumasks * @boot: true if need bootmem * * Allocates affinity and pending_mask cpumask if required. * Returns true if successful (or not required). - * Side effect: affinity has all bits set, pending_mask has all bits clear. */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, +static inline bool alloc_desc_masks(struct irq_desc *desc, int node, bool boot) { - int node; - +#ifdef CONFIG_CPUMASK_OFFSTACK if (boot) { alloc_bootmem_cpumask_var(&desc->affinity); - cpumask_setall(desc->affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ alloc_bootmem_cpumask_var(&desc->pending_mask); - cpumask_clear(desc->pending_mask); #endif return true; } - node = cpu_to_node(cpu); - if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) return false; - cpumask_setall(desc->affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) { free_cpumask_var(desc->affinity); return false; } - cpumask_clear(desc->pending_mask); +#endif #endif return true; } +static inline void init_desc_masks(struct irq_desc *desc) +{ + cpumask_setall(desc->affinity); +#ifdef CONFIG_GENERIC_PENDING_IRQ + cpumask_clear(desc->pending_mask); +#endif +} + /** * init_copy_desc_masks - copy cpumasks for irq_desc * @old_desc: pointer to old irq_desc struct @@ -478,7 +476,7 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, static inline void init_copy_desc_masks(struct irq_desc *old_desc, struct irq_desc *new_desc) { -#ifdef CONFIG_CPUMASKS_OFFSTACK +#ifdef CONFIG_CPUMASK_OFFSTACK cpumask_copy(new_desc->affinity, old_desc->affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ @@ -499,12 +497,16 @@ static inline void free_desc_masks(struct irq_desc *old_desc, #else /* !CONFIG_SMP */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, +static inline bool alloc_desc_masks(struct irq_desc *desc, int node, bool boot) { return true; } +static inline void init_desc_masks(struct irq_desc *desc) +{ +} + static inline void init_copy_desc_masks(struct irq_desc *old_desc, struct irq_desc *new_desc) { diff --git a/include/linux/mm.h b/include/linux/mm.h index 009eabd3c21c..9772d6cbfc82 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1032,8 +1032,6 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); -extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, - unsigned long end_pfn); extern void remove_all_active_ranges(void); extern unsigned long absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 186ec6ab334d..a47c879e1304 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1097,6 +1097,32 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); #define pfn_valid_within(pfn) (1) #endif +#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL +/* + * pfn_valid() is meant to be able to tell if a given PFN has valid memmap + * associated with it or not. In FLATMEM, it is expected that holes always + * have valid memmap as long as there is valid PFNs either side of the hole. + * In SPARSEMEM, it is assumed that a valid section has a memmap for the + * entire section. + * + * However, an ARM, and maybe other embedded architectures in the future + * free memmap backing holes to save memory on the assumption the memmap is + * never used. The page_zone linkages are then broken even though pfn_valid() + * returns true. A walker of the full memmap must then do this additional + * check to ensure the memmap they are looking at is sane by making sure + * the zone and PFN linkages are still valid. This is expensive, but walkers + * of the full memmap are extremely rare. + */ +int memmap_valid_within(unsigned long pfn, + struct page *page, struct zone *zone); +#else +static inline int memmap_valid_within(unsigned long pfn, + struct page *page, struct zone *zone) +{ + return 1; +} +#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ + #endif /* !__GENERATING_BOUNDS.H */ #endif /* !__ASSEMBLY__ */ #endif /* _LINUX_MMZONE_H */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 3069ec7e0ab8..878cab4f5fcc 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -150,5 +150,6 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); */ extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); +extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); #endif diff --git a/include/linux/net_dropmon.h b/include/linux/net_dropmon.h index 0217fb81a630..0e2e100c44a2 100644 --- a/include/linux/net_dropmon.h +++ b/include/linux/net_dropmon.h @@ -1,6 +1,7 @@ #ifndef __NET_DROPMON_H #define __NET_DROPMON_H +#include <linux/types.h> #include <linux/netlink.h> struct net_dm_drop_point { diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h index 3066789b972a..b2f384d42611 100644 --- a/include/linux/netfilter/nf_conntrack_tcp.h +++ b/include/linux/netfilter/nf_conntrack_tcp.h @@ -35,6 +35,9 @@ enum tcp_conntrack { /* Has unacknowledged data */ #define IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED 0x10 +/* The field td_maxack has been set */ +#define IP_CT_TCP_FLAG_MAXACK_SET 0x20 + struct nf_ct_tcp_flags { __u8 flags; __u8 mask; @@ -46,6 +49,7 @@ struct ip_ct_tcp_state { u_int32_t td_end; /* max of seq + len */ u_int32_t td_maxend; /* max of ack + max(win, 1) */ u_int32_t td_maxwin; /* max(win) */ + u_int32_t td_maxack; /* max of ack */ u_int8_t td_scale; /* window scale factor */ u_int8_t flags; /* per direction options */ }; diff --git a/include/linux/parport.h b/include/linux/parport.h index e1f83c5065c5..38a423ed3c01 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -324,6 +324,10 @@ struct parport { int spintime; atomic_t ref_count; + unsigned long devflags; +#define PARPORT_DEVPROC_REGISTERED 0 + struct pardevice *proc_device; /* Currently register proc device */ + struct list_head full_list; struct parport *slaves[3]; }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 06ba90c211a5..0f71812d67d3 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1406,7 +1406,7 @@ #define PCI_DEVICE_ID_VIA_82C598_1 0x8598 #define PCI_DEVICE_ID_VIA_838X_1 0xB188 #define PCI_DEVICE_ID_VIA_83_87XX_1 0xB198 -#define PCI_DEVICE_ID_VIA_C409_IDE 0XC409 +#define PCI_DEVICE_ID_VIA_VX855_IDE 0xC409 #define PCI_DEVICE_ID_VIA_ANON 0xFFFF #define PCI_VENDOR_ID_SIEMENS 0x110A diff --git a/include/linux/rculist.h b/include/linux/rculist.h index e649bd3f2c97..5710f43bbc9e 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -198,6 +198,32 @@ static inline void list_splice_init_rcu(struct list_head *list, at->prev = last; } +/** + * list_entry_rcu - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). + */ +#define list_entry_rcu(ptr, type, member) \ + container_of(rcu_dereference(ptr), type, member) + +/** + * list_first_entry_rcu - get the first element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + * + * Note, that list is expected to be not empty. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). + */ +#define list_first_entry_rcu(ptr, type, member) \ + list_entry_rcu((ptr)->next, type, member) + #define __list_for_each_rcu(pos, head) \ for (pos = rcu_dereference((head)->next); \ pos != (head); \ @@ -214,9 +240,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \ + for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \ prefetch(pos->member.next), &pos->member != (head); \ - pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member)) + pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) /** diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 58b2aa5312b9..5a5153806c42 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -161,8 +161,15 @@ struct rcu_data { unsigned long offline_fqs; /* Kicked due to being offline. */ unsigned long resched_ipi; /* Sent a resched IPI. */ - /* 5) For future __rcu_pending statistics. */ + /* 5) __rcu_pending() statistics. */ long n_rcu_pending; /* rcu_pending() calls since boot. */ + long n_rp_qs_pending; + long n_rp_cb_ready; + long n_rp_cpu_needs_gp; + long n_rp_gp_completed; + long n_rp_gp_started; + long n_rp_need_fqs; + long n_rp_need_nothing; int cpu; }; diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h index cdedc01036e4..99928dce37ea 100644 --- a/include/linux/reiserfs_xattr.h +++ b/include/linux/reiserfs_xattr.h @@ -41,6 +41,7 @@ int reiserfs_xattr_init(struct super_block *sb, int mount_flags); int reiserfs_lookup_privroot(struct super_block *sb); int reiserfs_delete_xattrs(struct inode *inode); int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs); +int reiserfs_permission(struct inode *inode, int mask); #ifdef CONFIG_REISERFS_FS_XATTR #define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir) @@ -50,7 +51,6 @@ int reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); ssize_t reiserfs_listxattr(struct dentry *dentry, char *buffer, size_t size); int reiserfs_removexattr(struct dentry *dentry, const char *name); -int reiserfs_permission(struct inode *inode, int mask); int reiserfs_xattr_get(struct inode *, const char *, void *, size_t); int reiserfs_xattr_set(struct inode *, const char *, const void *, size_t, int); @@ -117,8 +117,6 @@ static inline void reiserfs_init_xattr_rwsem(struct inode *inode) #define reiserfs_listxattr NULL #define reiserfs_removexattr NULL -#define reiserfs_permission NULL - static inline void reiserfs_init_xattr_rwsem(struct inode *inode) { } diff --git a/include/linux/sched.h b/include/linux/sched.h index 1ed4ef520680..d1399660b776 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -77,6 +77,7 @@ struct sched_param { #include <linux/proportions.h> #include <linux/seccomp.h> #include <linux/rcupdate.h> +#include <linux/rculist.h> #include <linux/rtmutex.h> #include <linux/time.h> @@ -116,6 +117,7 @@ struct bts_context; * 11 bit fractions. */ extern unsigned long avenrun[]; /* Load averages */ +extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift); #define FSHIFT 11 /* nr of bits of precision */ #define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */ @@ -135,8 +137,8 @@ DECLARE_PER_CPU(unsigned long, process_counts); extern int nr_processes(void); extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); -extern unsigned long nr_active(void); extern unsigned long nr_iowait(void); +extern void calc_global_load(void); extern unsigned long get_parent_ip(unsigned long addr); @@ -838,7 +840,17 @@ struct sched_group { */ u32 reciprocal_cpu_power; - unsigned long cpumask[]; + /* + * The CPUs this group covers. + * + * NOTE: this field is variable length. (Allocated dynamically + * by attaching extra space to the end of the structure, + * depending on how many CPUs the kernel has booted up with) + * + * It is also be embedded into static data structures at build + * time. (See 'struct static_sched_group' in kernel/sched.c) + */ + unsigned long cpumask[0]; }; static inline struct cpumask *sched_group_cpus(struct sched_group *sg) @@ -924,8 +936,17 @@ struct sched_domain { char *name; #endif - /* span of all CPUs in this domain */ - unsigned long span[]; + /* + * Span of all CPUs in this domain. + * + * NOTE: this field is variable length. (Allocated dynamically + * by attaching extra space to the end of the structure, + * depending on how many CPUs the kernel has booted up with) + * + * It is also be embedded into static data structures at build + * time. (See 'struct static_sched_domain' in kernel/sched.c) + */ + unsigned long span[0]; }; static inline struct cpumask *sched_domain_span(struct sched_domain *sd) @@ -2007,7 +2028,8 @@ static inline unsigned long wait_task_inactive(struct task_struct *p, } #endif -#define next_task(p) list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks) +#define next_task(p) \ + list_entry_rcu((p)->tasks.next, struct task_struct, tasks) #define for_each_process(p) \ for (p = &init_task ; (p = next_task(p)) != &init_task ; ) @@ -2046,8 +2068,8 @@ int same_thread_group(struct task_struct *p1, struct task_struct *p2) static inline struct task_struct *next_thread(const struct task_struct *p) { - return list_entry(rcu_dereference(p->thread_group.next), - struct task_struct, thread_group); + return list_entry_rcu(p->thread_group.next, + struct task_struct, thread_group); } static inline int thread_group_empty(struct task_struct *p) diff --git a/include/linux/signal.h b/include/linux/signal.h index 84f997f8aa53..c7552836bd95 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -235,6 +235,8 @@ static inline int valid_signal(unsigned long sig) extern int next_signal(struct sigpending *pending, sigset_t *mask); extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p); extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); +extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, + siginfo_t *info); extern long do_sigpending(void __user *, unsigned long); extern int sigprocmask(int, sigset_t *, sigset_t *); extern int show_unhandled_signals; diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 938234c4a996..d4841ed8215b 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -60,6 +60,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock) #define __raw_spin_is_locked(lock) ((void)(lock), 0) /* for sched.c and kernel_lock.c: */ # define __raw_spin_lock(lock) do { (void)(lock); } while (0) +# define __raw_spin_lock_flags(lock, flags) do { (void)(lock); } while (0) # define __raw_spin_unlock(lock) do { (void)(lock); } while (0) # define __raw_spin_trylock(lock) ({ (void)(lock); 1; }) #endif /* DEBUG_SPINLOCK */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 62d81435347a..d476aad3ff57 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -437,6 +437,11 @@ static inline int mem_cgroup_cache_charge_swapin(struct page *page, return 0; } +static inline void +mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) +{ +} + #endif /* CONFIG_SWAP */ #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index ac9ff54f7cb3..cb1a6631b8f4 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -29,7 +29,8 @@ extern void *swiotlb_alloc(unsigned order, unsigned long nslabs); extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t address); -extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address); +extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, + dma_addr_t address); extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size); diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index e6b820f8b56b..a8cc4e13434c 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -21,13 +21,14 @@ struct restart_block { struct { unsigned long arg0, arg1, arg2, arg3; }; - /* For futex_wait */ + /* For futex_wait and futex_wait_requeue_pi */ struct { u32 *uaddr; u32 val; u32 flags; u32 bitset; u64 time; + u32 *uaddr2; } futex; /* For nanosleep */ struct { diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index c7aa154f4bfc..eb96603d92db 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -259,14 +259,12 @@ static inline void tracehook_finish_clone(struct task_struct *child, /** * tracehook_report_clone - in parent, new child is about to start running - * @trace: return value from tracehook_prepare_clone() * @regs: parent's user register state * @clone_flags: flags from parent's system call * @pid: new child's PID in the parent's namespace * @child: new child task * - * Called after a child is set up, but before it has been started - * running. @trace is the value returned by tracehook_prepare_clone(). + * Called after a child is set up, but before it has been started running. * This is not a good place to block, because the child has not started * yet. Suspend the child here if desired, and then block in * tracehook_report_clone_complete(). This must prevent the child from @@ -276,13 +274,14 @@ static inline void tracehook_finish_clone(struct task_struct *child, * * Called with no locks held, but the child cannot run until this returns. */ -static inline void tracehook_report_clone(int trace, struct pt_regs *regs, +static inline void tracehook_report_clone(struct pt_regs *regs, unsigned long clone_flags, pid_t pid, struct task_struct *child) { - if (unlikely(trace) || unlikely(clone_flags & CLONE_PTRACE)) { + if (unlikely(task_ptrace(child))) { /* - * The child starts up with an immediate SIGSTOP. + * It doesn't matter who attached/attaching to this + * task, the pending SIGSTOP is right in any case. */ sigaddset(&child->pending.signal, SIGSTOP); set_tsk_thread_flag(child, TIF_SIGPENDING); diff --git a/include/linux/wait.h b/include/linux/wait.h index bc024632f365..6788e1a4d4ca 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -132,8 +132,6 @@ static inline void __remove_wait_queue(wait_queue_head_t *head, list_del(&old->task_list); } -void __wake_up_common(wait_queue_head_t *q, unsigned int mode, - int nr_exclusive, int sync, void *key); void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index c9184f756cad..68a8d873bbd9 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -680,7 +680,7 @@ fc_remote_port_chkready(struct fc_rport *rport) if (rport->roles & FC_PORT_ROLE_FCP_TARGET) result = 0; else if (rport->flags & FC_RPORT_DEVLOSS_PENDING) - result = DID_TRANSPORT_DISRUPTED << 16; + result = DID_IMM_RETRY << 16; else result = DID_NO_CONNECT << 16; break; @@ -688,7 +688,7 @@ fc_remote_port_chkready(struct fc_rport *rport) if (rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT) result = DID_TRANSPORT_FAILFAST << 16; else - result = DID_TRANSPORT_DISRUPTED << 16; + result = DID_IMM_RETRY << 16; break; default: result = DID_NO_CONNECT << 16; diff --git a/include/xen/Kbuild b/include/xen/Kbuild new file mode 100644 index 000000000000..4e65c16a445b --- /dev/null +++ b/include/xen/Kbuild @@ -0,0 +1 @@ +header-y += evtchn.h diff --git a/include/xen/events.h b/include/xen/events.h index 0d5f1adc0363..e68d59a90ca8 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -53,4 +53,7 @@ bool xen_test_irq_pending(int irq); irq will be disabled so it won't deliver an interrupt. */ void xen_poll_irq(int irq); +/* Determine the IRQ which is bound to an event channel */ +unsigned irq_from_evtchn(unsigned int evtchn); + #endif /* _XEN_EVENTS_H */ diff --git a/include/xen/evtchn.h b/include/xen/evtchn.h new file mode 100644 index 000000000000..14e833ee4e0b --- /dev/null +++ b/include/xen/evtchn.h @@ -0,0 +1,88 @@ +/****************************************************************************** + * evtchn.h + * + * Interface to /dev/xen/evtchn. + * + * Copyright (c) 2003-2005, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __LINUX_PUBLIC_EVTCHN_H__ +#define __LINUX_PUBLIC_EVTCHN_H__ + +/* + * Bind a fresh port to VIRQ @virq. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_VIRQ \ + _IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq)) +struct ioctl_evtchn_bind_virq { + unsigned int virq; +}; + +/* + * Bind a fresh port to remote <@remote_domain, @remote_port>. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_INTERDOMAIN \ + _IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain)) +struct ioctl_evtchn_bind_interdomain { + unsigned int remote_domain, remote_port; +}; + +/* + * Allocate a fresh port for binding to @remote_domain. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_UNBOUND_PORT \ + _IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port)) +struct ioctl_evtchn_bind_unbound_port { + unsigned int remote_domain; +}; + +/* + * Unbind previously allocated @port. + */ +#define IOCTL_EVTCHN_UNBIND \ + _IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind)) +struct ioctl_evtchn_unbind { + unsigned int port; +}; + +/* + * Unbind previously allocated @port. + */ +#define IOCTL_EVTCHN_NOTIFY \ + _IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify)) +struct ioctl_evtchn_notify { + unsigned int port; +}; + +/* Clear and reinitialise the event buffer. Clear error condition. */ +#define IOCTL_EVTCHN_RESET \ + _IOC(_IOC_NONE, 'E', 5, 0) + +#endif /* __LINUX_PUBLIC_EVTCHN_H__ */ diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h index 453235e923f0..e8b6519d47e9 100644 --- a/include/xen/interface/version.h +++ b/include/xen/interface/version.h @@ -57,4 +57,7 @@ struct xen_feature_info { /* Declares the features reported by XENVER_get_features. */ #include "features.h" +/* arg == NULL; returns host memory page size. */ +#define XENVER_pagesize 7 + #endif /* __XEN_PUBLIC_VERSION_H__ */ diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index f87f9614844d..b9763badbd77 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -91,8 +91,7 @@ struct xenbus_driver { void (*otherend_changed)(struct xenbus_device *dev, enum xenbus_state backend_state); int (*remove)(struct xenbus_device *dev); - int (*suspend)(struct xenbus_device *dev); - int (*suspend_cancel)(struct xenbus_device *dev); + int (*suspend)(struct xenbus_device *dev, pm_message_t state); int (*resume)(struct xenbus_device *dev); int (*uevent)(struct xenbus_device *, char **, int, char *, int); struct device_driver driver; diff --git a/init/Kconfig b/init/Kconfig index 7be4d3836745..d4e9671347ee 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -308,7 +308,7 @@ menu "RCU Subsystem" choice prompt "RCU Implementation" - default CLASSIC_RCU + default TREE_RCU config CLASSIC_RCU bool "Classic RCU" diff --git a/init/main.c b/init/main.c index 7c6a652d3d78..bb7dc57eee36 100644 --- a/init/main.c +++ b/init/main.c @@ -566,8 +566,7 @@ asmlinkage void __init start_kernel(void) tick_init(); boot_cpu_init(); page_address_init(); - printk(KERN_NOTICE); - printk(linux_banner); + printk(KERN_NOTICE "%s", linux_banner); setup_arch(&command_line); mm_init_owner(&init_mm, &init_task); setup_command_line(command_line); diff --git a/ipc/sem.c b/ipc/sem.c index 16a2189e96f9..87c2b641fd7b 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -1290,8 +1290,8 @@ void exit_sem(struct task_struct *tsk) int i; rcu_read_lock(); - un = list_entry(rcu_dereference(ulp->list_proc.next), - struct sem_undo, list_proc); + un = list_entry_rcu(ulp->list_proc.next, + struct sem_undo, list_proc); if (&un->list_proc == &ulp->list_proc) semid = -1; else diff --git a/ipc/shm.c b/ipc/shm.c index faa46da99ebe..425971600485 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -969,10 +969,13 @@ SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) { struct mm_struct *mm = current->mm; - struct vm_area_struct *vma, *next; + struct vm_area_struct *vma; unsigned long addr = (unsigned long)shmaddr; - loff_t size = 0; int retval = -EINVAL; +#ifdef CONFIG_MMU + loff_t size = 0; + struct vm_area_struct *next; +#endif if (addr & ~PAGE_MASK) return retval; diff --git a/kernel/async.c b/kernel/async.c index 968ef9457d4e..27235f5de198 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -92,19 +92,18 @@ extern int initcall_debug; static async_cookie_t __lowest_in_progress(struct list_head *running) { struct async_entry *entry; + if (!list_empty(running)) { entry = list_first_entry(running, struct async_entry, list); return entry->cookie; - } else if (!list_empty(&async_pending)) { - entry = list_first_entry(&async_pending, - struct async_entry, list); - return entry->cookie; - } else { - /* nothing in progress... next_cookie is "infinity" */ - return next_cookie; } + list_for_each_entry(entry, &async_pending, list) + if (entry->running == running) + return entry->cookie; + + return next_cookie; /* "infinity" value */ } static async_cookie_t lowest_in_progress(struct list_head *running) diff --git a/kernel/compat.c b/kernel/compat.c index 42d56544460f..f6c204f07ea6 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -882,6 +882,17 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, } +asmlinkage long +compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, + struct compat_siginfo __user *uinfo) +{ + siginfo_t info; + + if (copy_siginfo_from_user32(&info, uinfo)) + return -EFAULT; + return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); +} + #ifdef __ARCH_WANT_COMPAT_SYS_TIME /* compat_time_t is a 32 bit "long" and needs to get converted. */ diff --git a/kernel/fork.c b/kernel/fork.c index 711468f3db2a..5449efbc6427 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1408,7 +1408,7 @@ long do_fork(unsigned long clone_flags, } audit_finish_fork(p); - tracehook_report_clone(trace, regs, clone_flags, nr, p); + tracehook_report_clone(regs, clone_flags, nr, p); /* * We set PF_STARTING at creation in case tracing wants to diff --git a/kernel/futex.c b/kernel/futex.c index eef8cd26b5e5..80b5ce716596 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -19,6 +19,10 @@ * PRIVATE futexes by Eric Dumazet * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com> * + * Requeue-PI support by Darren Hart <dvhltc@us.ibm.com> + * Copyright (C) IBM Corporation, 2009 + * Thanks to Thomas Gleixner for conceptual design and careful reviews. + * * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly * enough at me, Linus for the original (flawed) idea, Matthew * Kirkwood for proof-of-concept implementation. @@ -96,8 +100,8 @@ struct futex_pi_state { */ struct futex_q { struct plist_node list; - /* There can only be a single waiter */ - wait_queue_head_t waiter; + /* Waiter reference */ + struct task_struct *task; /* Which hash list lock to use: */ spinlock_t *lock_ptr; @@ -107,7 +111,9 @@ struct futex_q { /* Optional priority inheritance state: */ struct futex_pi_state *pi_state; - struct task_struct *task; + + /* rt_waiter storage for requeue_pi: */ + struct rt_mutex_waiter *rt_waiter; /* Bitset for the optional bitmasked wakeup */ u32 bitset; @@ -193,6 +199,7 @@ static void drop_futex_key_refs(union futex_key *key) * @uaddr: virtual address of the futex * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED * @key: address where result is stored. + * @rw: mapping needs to be read/write (values: VERIFY_READ, VERIFY_WRITE) * * Returns a negative error code or 0 * The key words are stored in *key on success. @@ -203,7 +210,8 @@ static void drop_futex_key_refs(union futex_key *key) * * lock_page() might sleep, the caller should not hold a spinlock. */ -static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) +static int +get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) { unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; @@ -226,7 +234,7 @@ static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) * but access_ok() should be faster than find_vma() */ if (!fshared) { - if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))) + if (unlikely(!access_ok(rw, uaddr, sizeof(u32)))) return -EFAULT; key->private.mm = mm; key->private.address = address; @@ -235,7 +243,7 @@ static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) } again: - err = get_user_pages_fast(address, 1, 0, &page); + err = get_user_pages_fast(address, 1, rw == VERIFY_WRITE, &page); if (err < 0) return err; @@ -276,6 +284,25 @@ void put_futex_key(int fshared, union futex_key *key) drop_futex_key_refs(key); } +/** + * futex_top_waiter() - Return the highest priority waiter on a futex + * @hb: the hash bucket the futex_q's reside in + * @key: the futex key (to distinguish it from other futex futex_q's) + * + * Must be called with the hb lock held. + */ +static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, + union futex_key *key) +{ + struct futex_q *this; + + plist_for_each_entry(this, &hb->chain, list) { + if (match_futex(&this->key, key)) + return this; + } + return NULL; +} + static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) { u32 curval; @@ -537,28 +564,160 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, return 0; } +/** + * futex_lock_pi_atomic() - atomic work required to acquire a pi aware futex + * @uaddr: the pi futex user address + * @hb: the pi futex hash bucket + * @key: the futex key associated with uaddr and hb + * @ps: the pi_state pointer where we store the result of the + * lookup + * @task: the task to perform the atomic lock work for. This will + * be "current" except in the case of requeue pi. + * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) + * + * Returns: + * 0 - ready to wait + * 1 - acquired the lock + * <0 - error + * + * The hb->lock and futex_key refs shall be held by the caller. + */ +static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, + union futex_key *key, + struct futex_pi_state **ps, + struct task_struct *task, int set_waiters) +{ + int lock_taken, ret, ownerdied = 0; + u32 uval, newval, curval; + +retry: + ret = lock_taken = 0; + + /* + * To avoid races, we attempt to take the lock here again + * (by doing a 0 -> TID atomic cmpxchg), while holding all + * the locks. It will most likely not succeed. + */ + newval = task_pid_vnr(task); + if (set_waiters) + newval |= FUTEX_WAITERS; + + curval = cmpxchg_futex_value_locked(uaddr, 0, newval); + + if (unlikely(curval == -EFAULT)) + return -EFAULT; + + /* + * Detect deadlocks. + */ + if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task)))) + return -EDEADLK; + + /* + * Surprise - we got the lock. Just return to userspace: + */ + if (unlikely(!curval)) + return 1; + + uval = curval; + + /* + * Set the FUTEX_WAITERS flag, so the owner will know it has someone + * to wake at the next unlock. + */ + newval = curval | FUTEX_WAITERS; + + /* + * There are two cases, where a futex might have no owner (the + * owner TID is 0): OWNER_DIED. We take over the futex in this + * case. We also do an unconditional take over, when the owner + * of the futex died. + * + * This is safe as we are protected by the hash bucket lock ! + */ + if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { + /* Keep the OWNER_DIED bit */ + newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task); + ownerdied = 0; + lock_taken = 1; + } + + curval = cmpxchg_futex_value_locked(uaddr, uval, newval); + + if (unlikely(curval == -EFAULT)) + return -EFAULT; + if (unlikely(curval != uval)) + goto retry; + + /* + * We took the lock due to owner died take over. + */ + if (unlikely(lock_taken)) + return 1; + + /* + * We dont have the lock. Look up the PI state (or create it if + * we are the first waiter): + */ + ret = lookup_pi_state(uval, hb, key, ps); + + if (unlikely(ret)) { + switch (ret) { + case -ESRCH: + /* + * No owner found for this futex. Check if the + * OWNER_DIED bit is set to figure out whether + * this is a robust futex or not. + */ + if (get_futex_value_locked(&curval, uaddr)) + return -EFAULT; + + /* + * We simply start over in case of a robust + * futex. The code above will take the futex + * and return happy. + */ + if (curval & FUTEX_OWNER_DIED) { + ownerdied = 1; + goto retry; + } + default: + break; + } + } + + return ret; +} + /* * The hash bucket lock must be held when this is called. * Afterwards, the futex_q must not be accessed. */ static void wake_futex(struct futex_q *q) { - plist_del(&q->list, &q->list.plist); + struct task_struct *p = q->task; + /* - * The lock in wake_up_all() is a crucial memory barrier after the - * plist_del() and also before assigning to q->lock_ptr. + * We set q->lock_ptr = NULL _before_ we wake up the task. If + * a non futex wake up happens on another CPU then the task + * might exit and p would dereference a non existing task + * struct. Prevent this by holding a reference on p across the + * wake up. */ - wake_up(&q->waiter); + get_task_struct(p); + + plist_del(&q->list, &q->list.plist); /* - * The waiting task can free the futex_q as soon as this is written, - * without taking any locks. This must come last. - * - * A memory barrier is required here to prevent the following store to - * lock_ptr from getting ahead of the wakeup. Clearing the lock at the - * end of wake_up() does not prevent this store from moving. + * The waiting task can free the futex_q as soon as + * q->lock_ptr = NULL is written, without taking any locks. A + * memory barrier is required here to prevent the following + * store to lock_ptr from getting ahead of the plist_del. */ smp_wmb(); q->lock_ptr = NULL; + + wake_up_state(p, TASK_NORMAL); + put_task_struct(p); } static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) @@ -677,7 +836,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) if (!bitset) return -EINVAL; - ret = get_futex_key(uaddr, fshared, &key); + ret = get_futex_key(uaddr, fshared, &key, VERIFY_READ); if (unlikely(ret != 0)) goto out; @@ -687,7 +846,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) plist_for_each_entry_safe(this, next, head, list) { if (match_futex (&this->key, &key)) { - if (this->pi_state) { + if (this->pi_state || this->rt_waiter) { ret = -EINVAL; break; } @@ -723,10 +882,10 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, int ret, op_ret; retry: - ret = get_futex_key(uaddr1, fshared, &key1); + ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ); if (unlikely(ret != 0)) goto out; - ret = get_futex_key(uaddr2, fshared, &key2); + ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); if (unlikely(ret != 0)) goto out_put_key1; @@ -800,24 +959,185 @@ out: return ret; } -/* - * Requeue all waiters hashed on one physical page to another - * physical page. +/** + * requeue_futex() - Requeue a futex_q from one hb to another + * @q: the futex_q to requeue + * @hb1: the source hash_bucket + * @hb2: the target hash_bucket + * @key2: the new key for the requeued futex_q + */ +static inline +void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, + struct futex_hash_bucket *hb2, union futex_key *key2) +{ + + /* + * If key1 and key2 hash to the same bucket, no need to + * requeue. + */ + if (likely(&hb1->chain != &hb2->chain)) { + plist_del(&q->list, &hb1->chain); + plist_add(&q->list, &hb2->chain); + q->lock_ptr = &hb2->lock; +#ifdef CONFIG_DEBUG_PI_LIST + q->list.plist.lock = &hb2->lock; +#endif + } + get_futex_key_refs(key2); + q->key = *key2; +} + +/** + * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue + * q: the futex_q + * key: the key of the requeue target futex + * + * During futex_requeue, with requeue_pi=1, it is possible to acquire the + * target futex if it is uncontended or via a lock steal. Set the futex_q key + * to the requeue target futex so the waiter can detect the wakeup on the right + * futex, but remove it from the hb and NULL the rt_waiter so it can detect + * atomic lock acquisition. Must be called with the q->lock_ptr held. + */ +static inline +void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key) +{ + drop_futex_key_refs(&q->key); + get_futex_key_refs(key); + q->key = *key; + + WARN_ON(plist_node_empty(&q->list)); + plist_del(&q->list, &q->list.plist); + + WARN_ON(!q->rt_waiter); + q->rt_waiter = NULL; + + wake_up_state(q->task, TASK_NORMAL); +} + +/** + * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter + * @pifutex: the user address of the to futex + * @hb1: the from futex hash bucket, must be locked by the caller + * @hb2: the to futex hash bucket, must be locked by the caller + * @key1: the from futex key + * @key2: the to futex key + * @ps: address to store the pi_state pointer + * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) + * + * Try and get the lock on behalf of the top waiter if we can do it atomically. + * Wake the top waiter if we succeed. If the caller specified set_waiters, + * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit. + * hb1 and hb2 must be held by the caller. + * + * Returns: + * 0 - failed to acquire the lock atomicly + * 1 - acquired the lock + * <0 - error + */ +static int futex_proxy_trylock_atomic(u32 __user *pifutex, + struct futex_hash_bucket *hb1, + struct futex_hash_bucket *hb2, + union futex_key *key1, union futex_key *key2, + struct futex_pi_state **ps, int set_waiters) +{ + struct futex_q *top_waiter = NULL; + u32 curval; + int ret; + + if (get_futex_value_locked(&curval, pifutex)) + return -EFAULT; + + /* + * Find the top_waiter and determine if there are additional waiters. + * If the caller intends to requeue more than 1 waiter to pifutex, + * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now, + * as we have means to handle the possible fault. If not, don't set + * the bit unecessarily as it will force the subsequent unlock to enter + * the kernel. + */ + top_waiter = futex_top_waiter(hb1, key1); + + /* There are no waiters, nothing for us to do. */ + if (!top_waiter) + return 0; + + /* + * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in + * the contended case or if set_waiters is 1. The pi_state is returned + * in ps in contended cases. + */ + ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, + set_waiters); + if (ret == 1) + requeue_pi_wake_futex(top_waiter, key2); + + return ret; +} + +/** + * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 + * uaddr1: source futex user address + * uaddr2: target futex user address + * nr_wake: number of waiters to wake (must be 1 for requeue_pi) + * nr_requeue: number of waiters to requeue (0-INT_MAX) + * requeue_pi: if we are attempting to requeue from a non-pi futex to a + * pi futex (pi to pi requeue is not supported) + * + * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire + * uaddr2 atomically on behalf of the top waiter. + * + * Returns: + * >=0 - on success, the number of tasks requeued or woken + * <0 - on error */ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, - int nr_wake, int nr_requeue, u32 *cmpval) + int nr_wake, int nr_requeue, u32 *cmpval, + int requeue_pi) { union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; + int drop_count = 0, task_count = 0, ret; + struct futex_pi_state *pi_state = NULL; struct futex_hash_bucket *hb1, *hb2; struct plist_head *head1; struct futex_q *this, *next; - int ret, drop_count = 0; + u32 curval2; + + if (requeue_pi) { + /* + * requeue_pi requires a pi_state, try to allocate it now + * without any locks in case it fails. + */ + if (refill_pi_state_cache()) + return -ENOMEM; + /* + * requeue_pi must wake as many tasks as it can, up to nr_wake + * + nr_requeue, since it acquires the rt_mutex prior to + * returning to userspace, so as to not leave the rt_mutex with + * waiters and no owner. However, second and third wake-ups + * cannot be predicted as they involve race conditions with the + * first wake and a fault while looking up the pi_state. Both + * pthread_cond_signal() and pthread_cond_broadcast() should + * use nr_wake=1. + */ + if (nr_wake != 1) + return -EINVAL; + } retry: - ret = get_futex_key(uaddr1, fshared, &key1); + if (pi_state != NULL) { + /* + * We will have to lookup the pi_state again, so free this one + * to keep the accounting correct. + */ + free_pi_state(pi_state); + pi_state = NULL; + } + + ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ); if (unlikely(ret != 0)) goto out; - ret = get_futex_key(uaddr2, fshared, &key2); + ret = get_futex_key(uaddr2, fshared, &key2, + requeue_pi ? VERIFY_WRITE : VERIFY_READ); if (unlikely(ret != 0)) goto out_put_key1; @@ -852,32 +1172,99 @@ retry_private: } } + if (requeue_pi && (task_count - nr_wake < nr_requeue)) { + /* + * Attempt to acquire uaddr2 and wake the top waiter. If we + * intend to requeue waiters, force setting the FUTEX_WAITERS + * bit. We force this here where we are able to easily handle + * faults rather in the requeue loop below. + */ + ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, + &key2, &pi_state, nr_requeue); + + /* + * At this point the top_waiter has either taken uaddr2 or is + * waiting on it. If the former, then the pi_state will not + * exist yet, look it up one more time to ensure we have a + * reference to it. + */ + if (ret == 1) { + WARN_ON(pi_state); + task_count++; + ret = get_futex_value_locked(&curval2, uaddr2); + if (!ret) + ret = lookup_pi_state(curval2, hb2, &key2, + &pi_state); + } + + switch (ret) { + case 0: + break; + case -EFAULT: + double_unlock_hb(hb1, hb2); + put_futex_key(fshared, &key2); + put_futex_key(fshared, &key1); + ret = get_user(curval2, uaddr2); + if (!ret) + goto retry; + goto out; + case -EAGAIN: + /* The owner was exiting, try again. */ + double_unlock_hb(hb1, hb2); + put_futex_key(fshared, &key2); + put_futex_key(fshared, &key1); + cond_resched(); + goto retry; + default: + goto out_unlock; + } + } + head1 = &hb1->chain; plist_for_each_entry_safe(this, next, head1, list) { - if (!match_futex (&this->key, &key1)) + if (task_count - nr_wake >= nr_requeue) + break; + + if (!match_futex(&this->key, &key1)) continue; - if (++ret <= nr_wake) { + + WARN_ON(!requeue_pi && this->rt_waiter); + WARN_ON(requeue_pi && !this->rt_waiter); + + /* + * Wake nr_wake waiters. For requeue_pi, if we acquired the + * lock, we already woke the top_waiter. If not, it will be + * woken by futex_unlock_pi(). + */ + if (++task_count <= nr_wake && !requeue_pi) { wake_futex(this); - } else { - /* - * If key1 and key2 hash to the same bucket, no need to - * requeue. - */ - if (likely(head1 != &hb2->chain)) { - plist_del(&this->list, &hb1->chain); - plist_add(&this->list, &hb2->chain); - this->lock_ptr = &hb2->lock; -#ifdef CONFIG_DEBUG_PI_LIST - this->list.plist.lock = &hb2->lock; -#endif - } - this->key = key2; - get_futex_key_refs(&key2); - drop_count++; + continue; + } - if (ret - nr_wake >= nr_requeue) - break; + /* + * Requeue nr_requeue waiters and possibly one more in the case + * of requeue_pi if we couldn't acquire the lock atomically. + */ + if (requeue_pi) { + /* Prepare the waiter to take the rt_mutex. */ + atomic_inc(&pi_state->refcount); + this->pi_state = pi_state; + ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, + this->rt_waiter, + this->task, 1); + if (ret == 1) { + /* We got the lock. */ + requeue_pi_wake_futex(this, &key2); + continue; + } else if (ret) { + /* -EDEADLK */ + this->pi_state = NULL; + free_pi_state(pi_state); + goto out_unlock; + } } + requeue_futex(this, hb1, hb2, &key2); + drop_count++; } out_unlock: @@ -897,7 +1284,9 @@ out_put_keys: out_put_key1: put_futex_key(fshared, &key1); out: - return ret; + if (pi_state != NULL) + free_pi_state(pi_state); + return ret ? ret : task_count; } /* The key must be already stored in q->key. */ @@ -905,8 +1294,6 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) { struct futex_hash_bucket *hb; - init_waitqueue_head(&q->waiter); - get_futex_key_refs(&q->key); hb = hash_futex(&q->key); q->lock_ptr = &hb->lock; @@ -1117,35 +1504,149 @@ handle_fault: */ #define FLAGS_SHARED 0x01 #define FLAGS_CLOCKRT 0x02 +#define FLAGS_HAS_TIMEOUT 0x04 static long futex_wait_restart(struct restart_block *restart); -static int futex_wait(u32 __user *uaddr, int fshared, - u32 val, ktime_t *abs_time, u32 bitset, int clockrt) +/** + * fixup_owner() - Post lock pi_state and corner case management + * @uaddr: user address of the futex + * @fshared: whether the futex is shared (1) or not (0) + * @q: futex_q (contains pi_state and access to the rt_mutex) + * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0) + * + * After attempting to lock an rt_mutex, this function is called to cleanup + * the pi_state owner as well as handle race conditions that may allow us to + * acquire the lock. Must be called with the hb lock held. + * + * Returns: + * 1 - success, lock taken + * 0 - success, lock not taken + * <0 - on error (-EFAULT) + */ +static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q, + int locked) { - struct task_struct *curr = current; - struct restart_block *restart; - DECLARE_WAITQUEUE(wait, curr); - struct futex_hash_bucket *hb; - struct futex_q q; - u32 uval; - int ret; - struct hrtimer_sleeper t; - int rem = 0; + struct task_struct *owner; + int ret = 0; - if (!bitset) - return -EINVAL; + if (locked) { + /* + * Got the lock. We might not be the anticipated owner if we + * did a lock-steal - fix up the PI-state in that case: + */ + if (q->pi_state->owner != current) + ret = fixup_pi_state_owner(uaddr, q, current, fshared); + goto out; + } - q.pi_state = NULL; - q.bitset = bitset; -retry: - q.key = FUTEX_KEY_INIT; - ret = get_futex_key(uaddr, fshared, &q.key); - if (unlikely(ret != 0)) + /* + * Catch the rare case, where the lock was released when we were on the + * way back before we locked the hash bucket. + */ + if (q->pi_state->owner == current) { + /* + * Try to get the rt_mutex now. This might fail as some other + * task acquired the rt_mutex after we removed ourself from the + * rt_mutex waiters list. + */ + if (rt_mutex_trylock(&q->pi_state->pi_mutex)) { + locked = 1; + goto out; + } + + /* + * pi_state is incorrect, some other task did a lock steal and + * we returned due to timeout or signal without taking the + * rt_mutex. Too late. We can access the rt_mutex_owner without + * locking, as the other task is now blocked on the hash bucket + * lock. Fix the state up. + */ + owner = rt_mutex_owner(&q->pi_state->pi_mutex); + ret = fixup_pi_state_owner(uaddr, q, owner, fshared); goto out; + } -retry_private: - hb = queue_lock(&q); + /* + * Paranoia check. If we did not take the lock, then we should not be + * the owner, nor the pending owner, of the rt_mutex. + */ + if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) + printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " + "pi-state %p\n", ret, + q->pi_state->pi_mutex.owner, + q->pi_state->owner); + +out: + return ret ? ret : locked; +} + +/** + * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal + * @hb: the futex hash bucket, must be locked by the caller + * @q: the futex_q to queue up on + * @timeout: the prepared hrtimer_sleeper, or null for no timeout + */ +static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, + struct hrtimer_sleeper *timeout) +{ + queue_me(q, hb); + + /* + * There might have been scheduling since the queue_me(), as we + * cannot hold a spinlock across the get_user() in case it + * faults, and we cannot just set TASK_INTERRUPTIBLE state when + * queueing ourselves into the futex hash. This code thus has to + * rely on the futex_wake() code removing us from hash when it + * wakes us up. + */ + set_current_state(TASK_INTERRUPTIBLE); + + /* Arm the timer */ + if (timeout) { + hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); + if (!hrtimer_active(&timeout->timer)) + timeout->task = NULL; + } + + /* + * !plist_node_empty() is safe here without any lock. + * q.lock_ptr != 0 is not safe, because of ordering against wakeup. + */ + if (likely(!plist_node_empty(&q->list))) { + /* + * If the timer has already expired, current will already be + * flagged for rescheduling. Only call schedule if there + * is no timeout, or if it has yet to expire. + */ + if (!timeout || timeout->task) + schedule(); + } + __set_current_state(TASK_RUNNING); +} + +/** + * futex_wait_setup() - Prepare to wait on a futex + * @uaddr: the futex userspace address + * @val: the expected value + * @fshared: whether the futex is shared (1) or not (0) + * @q: the associated futex_q + * @hb: storage for hash_bucket pointer to be returned to caller + * + * Setup the futex_q and locate the hash_bucket. Get the futex value and + * compare it with the expected value. Handle atomic faults internally. + * Return with the hb lock held and a q.key reference on success, and unlocked + * with no q.key reference on failure. + * + * Returns: + * 0 - uaddr contains val and hb has been locked + * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked + */ +static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared, + struct futex_q *q, struct futex_hash_bucket **hb) +{ + u32 uval; + int ret; /* * Access the page AFTER the hash-bucket is locked. @@ -1163,95 +1664,83 @@ retry_private: * A consequence is that futex_wait() can return zero and absorb * a wakeup when *uaddr != val on entry to the syscall. This is * rare, but normal. - * - * For shared futexes, we hold the mmap semaphore, so the mapping - * cannot have changed since we looked it up in get_futex_key. */ +retry: + q->key = FUTEX_KEY_INIT; + ret = get_futex_key(uaddr, fshared, &q->key, VERIFY_READ); + if (unlikely(ret != 0)) + return ret; + +retry_private: + *hb = queue_lock(q); + ret = get_futex_value_locked(&uval, uaddr); - if (unlikely(ret)) { - queue_unlock(&q, hb); + if (ret) { + queue_unlock(q, *hb); ret = get_user(uval, uaddr); if (ret) - goto out_put_key; + goto out; if (!fshared) goto retry_private; - put_futex_key(fshared, &q.key); + put_futex_key(fshared, &q->key); goto retry; } - ret = -EWOULDBLOCK; - if (unlikely(uval != val)) { - queue_unlock(&q, hb); - goto out_put_key; - } - /* Only actually queue if *uaddr contained val. */ - queue_me(&q, hb); + if (uval != val) { + queue_unlock(q, *hb); + ret = -EWOULDBLOCK; + } - /* - * There might have been scheduling since the queue_me(), as we - * cannot hold a spinlock across the get_user() in case it - * faults, and we cannot just set TASK_INTERRUPTIBLE state when - * queueing ourselves into the futex hash. This code thus has to - * rely on the futex_wake() code removing us from hash when it - * wakes us up. - */ +out: + if (ret) + put_futex_key(fshared, &q->key); + return ret; +} - /* add_wait_queue is the barrier after __set_current_state. */ - __set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&q.waiter, &wait); - /* - * !plist_node_empty() is safe here without any lock. - * q.lock_ptr != 0 is not safe, because of ordering against wakeup. - */ - if (likely(!plist_node_empty(&q.list))) { - if (!abs_time) - schedule(); - else { - hrtimer_init_on_stack(&t.timer, - clockrt ? CLOCK_REALTIME : - CLOCK_MONOTONIC, - HRTIMER_MODE_ABS); - hrtimer_init_sleeper(&t, current); - hrtimer_set_expires_range_ns(&t.timer, *abs_time, - current->timer_slack_ns); - - hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS); - if (!hrtimer_active(&t.timer)) - t.task = NULL; +static int futex_wait(u32 __user *uaddr, int fshared, + u32 val, ktime_t *abs_time, u32 bitset, int clockrt) +{ + struct hrtimer_sleeper timeout, *to = NULL; + struct restart_block *restart; + struct futex_hash_bucket *hb; + struct futex_q q; + int ret; - /* - * the timer could have already expired, in which - * case current would be flagged for rescheduling. - * Don't bother calling schedule. - */ - if (likely(t.task)) - schedule(); + if (!bitset) + return -EINVAL; - hrtimer_cancel(&t.timer); + q.pi_state = NULL; + q.bitset = bitset; + q.rt_waiter = NULL; - /* Flag if a timeout occured */ - rem = (t.task == NULL); + if (abs_time) { + to = &timeout; - destroy_hrtimer_on_stack(&t.timer); - } + hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init_sleeper(to, current); + hrtimer_set_expires_range_ns(&to->timer, *abs_time, + current->timer_slack_ns); } - __set_current_state(TASK_RUNNING); - /* - * NOTE: we don't remove ourselves from the waitqueue because - * we are the only user of it. - */ + /* Prepare to wait on uaddr. */ + ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); + if (ret) + goto out; + + /* queue_me and wait for wakeup, timeout, or a signal. */ + futex_wait_queue_me(hb, &q, to); /* If we were woken (and unqueued), we succeeded, whatever. */ ret = 0; if (!unqueue_me(&q)) goto out_put_key; ret = -ETIMEDOUT; - if (rem) + if (to && !to->task) goto out_put_key; /* @@ -1268,7 +1757,7 @@ retry_private: restart->futex.val = val; restart->futex.time = abs_time->tv64; restart->futex.bitset = bitset; - restart->futex.flags = 0; + restart->futex.flags = FLAGS_HAS_TIMEOUT; if (fshared) restart->futex.flags |= FLAGS_SHARED; @@ -1280,6 +1769,10 @@ retry_private: out_put_key: put_futex_key(fshared, &q.key); out: + if (to) { + hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); + } return ret; } @@ -1288,13 +1781,16 @@ static long futex_wait_restart(struct restart_block *restart) { u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; int fshared = 0; - ktime_t t; + ktime_t t, *tp = NULL; - t.tv64 = restart->futex.time; + if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { + t.tv64 = restart->futex.time; + tp = &t; + } restart->fn = do_no_restart_syscall; if (restart->futex.flags & FLAGS_SHARED) fshared = 1; - return (long)futex_wait(uaddr, fshared, restart->futex.val, &t, + return (long)futex_wait(uaddr, fshared, restart->futex.val, tp, restart->futex.bitset, restart->futex.flags & FLAGS_CLOCKRT); } @@ -1310,11 +1806,10 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, int detect, ktime_t *time, int trylock) { struct hrtimer_sleeper timeout, *to = NULL; - struct task_struct *curr = current; struct futex_hash_bucket *hb; - u32 uval, newval, curval; + u32 uval; struct futex_q q; - int ret, lock_taken, ownerdied = 0; + int res, ret; if (refill_pi_state_cache()) return -ENOMEM; @@ -1328,90 +1823,25 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, } q.pi_state = NULL; + q.rt_waiter = NULL; retry: q.key = FUTEX_KEY_INIT; - ret = get_futex_key(uaddr, fshared, &q.key); + ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; retry_private: hb = queue_lock(&q); -retry_locked: - ret = lock_taken = 0; - - /* - * To avoid races, we attempt to take the lock here again - * (by doing a 0 -> TID atomic cmpxchg), while holding all - * the locks. It will most likely not succeed. - */ - newval = task_pid_vnr(current); - - curval = cmpxchg_futex_value_locked(uaddr, 0, newval); - - if (unlikely(curval == -EFAULT)) - goto uaddr_faulted; - - /* - * Detect deadlocks. In case of REQUEUE_PI this is a valid - * situation and we return success to user space. - */ - if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) { - ret = -EDEADLK; - goto out_unlock_put_key; - } - - /* - * Surprise - we got the lock. Just return to userspace: - */ - if (unlikely(!curval)) - goto out_unlock_put_key; - - uval = curval; - - /* - * Set the WAITERS flag, so the owner will know it has someone - * to wake at next unlock - */ - newval = curval | FUTEX_WAITERS; - - /* - * There are two cases, where a futex might have no owner (the - * owner TID is 0): OWNER_DIED. We take over the futex in this - * case. We also do an unconditional take over, when the owner - * of the futex died. - * - * This is safe as we are protected by the hash bucket lock ! - */ - if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { - /* Keep the OWNER_DIED bit */ - newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(current); - ownerdied = 0; - lock_taken = 1; - } - - curval = cmpxchg_futex_value_locked(uaddr, uval, newval); - - if (unlikely(curval == -EFAULT)) - goto uaddr_faulted; - if (unlikely(curval != uval)) - goto retry_locked; - - /* - * We took the lock due to owner died take over. - */ - if (unlikely(lock_taken)) - goto out_unlock_put_key; - - /* - * We dont have the lock. Look up the PI state (or create it if - * we are the first waiter): - */ - ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state); - + ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0); if (unlikely(ret)) { switch (ret) { - + case 1: + /* We got the lock. */ + ret = 0; + goto out_unlock_put_key; + case -EFAULT: + goto uaddr_faulted; case -EAGAIN: /* * Task is exiting and we just wait for the @@ -1421,25 +1851,6 @@ retry_locked: put_futex_key(fshared, &q.key); cond_resched(); goto retry; - - case -ESRCH: - /* - * No owner found for this futex. Check if the - * OWNER_DIED bit is set to figure out whether - * this is a robust futex or not. - */ - if (get_futex_value_locked(&curval, uaddr)) - goto uaddr_faulted; - - /* - * We simply start over in case of a robust - * futex. The code above will take the futex - * and return happy. - */ - if (curval & FUTEX_OWNER_DIED) { - ownerdied = 1; - goto retry_locked; - } default: goto out_unlock_put_key; } @@ -1463,71 +1874,21 @@ retry_locked: } spin_lock(q.lock_ptr); - - if (!ret) { - /* - * Got the lock. We might not be the anticipated owner - * if we did a lock-steal - fix up the PI-state in - * that case: - */ - if (q.pi_state->owner != curr) - ret = fixup_pi_state_owner(uaddr, &q, curr, fshared); - } else { - /* - * Catch the rare case, where the lock was released - * when we were on the way back before we locked the - * hash bucket. - */ - if (q.pi_state->owner == curr) { - /* - * Try to get the rt_mutex now. This might - * fail as some other task acquired the - * rt_mutex after we removed ourself from the - * rt_mutex waiters list. - */ - if (rt_mutex_trylock(&q.pi_state->pi_mutex)) - ret = 0; - else { - /* - * pi_state is incorrect, some other - * task did a lock steal and we - * returned due to timeout or signal - * without taking the rt_mutex. Too - * late. We can access the - * rt_mutex_owner without locking, as - * the other task is now blocked on - * the hash bucket lock. Fix the state - * up. - */ - struct task_struct *owner; - int res; - - owner = rt_mutex_owner(&q.pi_state->pi_mutex); - res = fixup_pi_state_owner(uaddr, &q, owner, - fshared); - - /* propagate -EFAULT, if the fixup failed */ - if (res) - ret = res; - } - } else { - /* - * Paranoia check. If we did not take the lock - * in the trylock above, then we should not be - * the owner of the rtmutex, neither the real - * nor the pending one: - */ - if (rt_mutex_owner(&q.pi_state->pi_mutex) == curr) - printk(KERN_ERR "futex_lock_pi: ret = %d " - "pi-mutex: %p pi-state %p\n", ret, - q.pi_state->pi_mutex.owner, - q.pi_state->owner); - } - } + /* + * Fixup the pi_state owner and possibly acquire the lock if we + * haven't already. + */ + res = fixup_owner(uaddr, fshared, &q, !ret); + /* + * If fixup_owner() returned an error, proprogate that. If it acquired + * the lock, clear our -ETIMEDOUT or -EINTR. + */ + if (res) + ret = (res < 0) ? res : 0; /* - * If fixup_pi_state_owner() faulted and was unable to handle the - * fault, unlock it and return the fault to userspace. + * If fixup_owner() faulted and was unable to handle the fault, unlock + * it and return the fault to userspace. */ if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) rt_mutex_unlock(&q.pi_state->pi_mutex); @@ -1535,9 +1896,7 @@ retry_locked: /* Unqueue and drop the lock */ unqueue_me_pi(&q); - if (to) - destroy_hrtimer_on_stack(&to->timer); - return ret != -EINTR ? ret : -ERESTARTNOINTR; + goto out; out_unlock_put_key: queue_unlock(&q, hb); @@ -1547,7 +1906,7 @@ out_put_key: out: if (to) destroy_hrtimer_on_stack(&to->timer); - return ret; + return ret != -EINTR ? ret : -ERESTARTNOINTR; uaddr_faulted: /* @@ -1570,7 +1929,6 @@ uaddr_faulted: goto retry; } - /* * Userspace attempted a TID -> 0 atomic transition, and failed. * This is the in-kernel slowpath: we look up the PI state (if any), @@ -1594,7 +1952,7 @@ retry: if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) return -EPERM; - ret = get_futex_key(uaddr, fshared, &key); + ret = get_futex_key(uaddr, fshared, &key, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; @@ -1672,6 +2030,229 @@ pi_faulted: return ret; } +/** + * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex + * @hb: the hash_bucket futex_q was original enqueued on + * @q: the futex_q woken while waiting to be requeued + * @key2: the futex_key of the requeue target futex + * @timeout: the timeout associated with the wait (NULL if none) + * + * Detect if the task was woken on the initial futex as opposed to the requeue + * target futex. If so, determine if it was a timeout or a signal that caused + * the wakeup and return the appropriate error code to the caller. Must be + * called with the hb lock held. + * + * Returns + * 0 - no early wakeup detected + * <0 - -ETIMEDOUT or -ERESTARTNOINTR + */ +static inline +int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, + struct futex_q *q, union futex_key *key2, + struct hrtimer_sleeper *timeout) +{ + int ret = 0; + + /* + * With the hb lock held, we avoid races while we process the wakeup. + * We only need to hold hb (and not hb2) to ensure atomicity as the + * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb. + * It can't be requeued from uaddr2 to something else since we don't + * support a PI aware source futex for requeue. + */ + if (!match_futex(&q->key, key2)) { + WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr)); + /* + * We were woken prior to requeue by a timeout or a signal. + * Unqueue the futex_q and determine which it was. + */ + plist_del(&q->list, &q->list.plist); + drop_futex_key_refs(&q->key); + + if (timeout && !timeout->task) + ret = -ETIMEDOUT; + else + ret = -ERESTARTNOINTR; + } + return ret; +} + +/** + * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 + * @uaddr: the futex we initialyl wait on (non-pi) + * @fshared: whether the futexes are shared (1) or not (0). They must be + * the same type, no requeueing from private to shared, etc. + * @val: the expected value of uaddr + * @abs_time: absolute timeout + * @bitset: 32 bit wakeup bitset set by userspace, defaults to all. + * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) + * @uaddr2: the pi futex we will take prior to returning to user-space + * + * The caller will wait on uaddr and will be requeued by futex_requeue() to + * uaddr2 which must be PI aware. Normal wakeup will wake on uaddr2 and + * complete the acquisition of the rt_mutex prior to returning to userspace. + * This ensures the rt_mutex maintains an owner when it has waiters; without + * one, the pi logic wouldn't know which task to boost/deboost, if there was a + * need to. + * + * We call schedule in futex_wait_queue_me() when we enqueue and return there + * via the following: + * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue() + * 2) wakeup on uaddr2 after a requeue and subsequent unlock + * 3) signal (before or after requeue) + * 4) timeout (before or after requeue) + * + * If 3, we setup a restart_block with futex_wait_requeue_pi() as the function. + * + * If 2, we may then block on trying to take the rt_mutex and return via: + * 5) successful lock + * 6) signal + * 7) timeout + * 8) other lock acquisition failure + * + * If 6, we setup a restart_block with futex_lock_pi() as the function. + * + * If 4 or 7, we cleanup and return with -ETIMEDOUT. + * + * Returns: + * 0 - On success + * <0 - On error + */ +static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, + u32 val, ktime_t *abs_time, u32 bitset, + int clockrt, u32 __user *uaddr2) +{ + struct hrtimer_sleeper timeout, *to = NULL; + struct rt_mutex_waiter rt_waiter; + struct rt_mutex *pi_mutex = NULL; + struct futex_hash_bucket *hb; + union futex_key key2; + struct futex_q q; + int res, ret; + + if (!bitset) + return -EINVAL; + + if (abs_time) { + to = &timeout; + hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init_sleeper(to, current); + hrtimer_set_expires_range_ns(&to->timer, *abs_time, + current->timer_slack_ns); + } + + /* + * The waiter is allocated on our stack, manipulated by the requeue + * code while we sleep on uaddr. + */ + debug_rt_mutex_init_waiter(&rt_waiter); + rt_waiter.task = NULL; + + q.pi_state = NULL; + q.bitset = bitset; + q.rt_waiter = &rt_waiter; + + key2 = FUTEX_KEY_INIT; + ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); + if (unlikely(ret != 0)) + goto out; + + /* Prepare to wait on uaddr. */ + ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); + if (ret) + goto out_key2; + + /* Queue the futex_q, drop the hb lock, wait for wakeup. */ + futex_wait_queue_me(hb, &q, to); + + spin_lock(&hb->lock); + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); + spin_unlock(&hb->lock); + if (ret) + goto out_put_keys; + + /* + * In order for us to be here, we know our q.key == key2, and since + * we took the hb->lock above, we also know that futex_requeue() has + * completed and we no longer have to concern ourselves with a wakeup + * race with the atomic proxy lock acquition by the requeue code. + */ + + /* Check if the requeue code acquired the second futex for us. */ + if (!q.rt_waiter) { + /* + * Got the lock. We might not be the anticipated owner if we + * did a lock-steal - fix up the PI-state in that case. + */ + if (q.pi_state && (q.pi_state->owner != current)) { + spin_lock(q.lock_ptr); + ret = fixup_pi_state_owner(uaddr2, &q, current, + fshared); + spin_unlock(q.lock_ptr); + } + } else { + /* + * We have been woken up by futex_unlock_pi(), a timeout, or a + * signal. futex_unlock_pi() will not destroy the lock_ptr nor + * the pi_state. + */ + WARN_ON(!&q.pi_state); + pi_mutex = &q.pi_state->pi_mutex; + ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1); + debug_rt_mutex_free_waiter(&rt_waiter); + + spin_lock(q.lock_ptr); + /* + * Fixup the pi_state owner and possibly acquire the lock if we + * haven't already. + */ + res = fixup_owner(uaddr2, fshared, &q, !ret); + /* + * If fixup_owner() returned an error, proprogate that. If it + * acquired the lock, clear our -ETIMEDOUT or -EINTR. + */ + if (res) + ret = (res < 0) ? res : 0; + + /* Unqueue and drop the lock. */ + unqueue_me_pi(&q); + } + + /* + * If fixup_pi_state_owner() faulted and was unable to handle the + * fault, unlock the rt_mutex and return the fault to userspace. + */ + if (ret == -EFAULT) { + if (rt_mutex_owner(pi_mutex) == current) + rt_mutex_unlock(pi_mutex); + } else if (ret == -EINTR) { + /* + * We've already been requeued, but we have no way to + * restart by calling futex_lock_pi() directly. We + * could restart the syscall, but that will look at + * the user space value and return right away. So we + * drop back with EWOULDBLOCK to tell user space that + * "val" has been changed. That's the same what the + * restart of the syscall would do in + * futex_wait_setup(). + */ + ret = -EWOULDBLOCK; + } + +out_put_keys: + put_futex_key(fshared, &q.key); +out_key2: + put_futex_key(fshared, &key2); + +out: + if (to) { + hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); + } + return ret; +} + /* * Support for robust futexes: the kernel cleans up held futexes at * thread exit time. @@ -1894,7 +2475,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, fshared = 1; clockrt = op & FUTEX_CLOCK_REALTIME; - if (clockrt && cmd != FUTEX_WAIT_BITSET) + if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) return -ENOSYS; switch (cmd) { @@ -1909,10 +2490,11 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, ret = futex_wake(uaddr, fshared, val, val3); break; case FUTEX_REQUEUE: - ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL); + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0); break; case FUTEX_CMP_REQUEUE: - ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3); + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, + 0); break; case FUTEX_WAKE_OP: ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); @@ -1929,6 +2511,15 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, if (futex_cmpxchg_enabled) ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); break; + case FUTEX_WAIT_REQUEUE_PI: + val3 = FUTEX_BITSET_MATCH_ANY; + ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3, + clockrt, uaddr2); + break; + case FUTEX_CMP_REQUEUE_PI: + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, + 1); + break; default: ret = -ENOSYS; } @@ -1946,7 +2537,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, int cmd = op & FUTEX_CMD_MASK; if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || - cmd == FUTEX_WAIT_BITSET)) { + cmd == FUTEX_WAIT_BITSET || + cmd == FUTEX_WAIT_REQUEUE_PI)) { if (copy_from_user(&ts, utime, sizeof(ts)) != 0) return -EFAULT; if (!timespec_valid(&ts)) @@ -1958,11 +2550,11 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, tp = &t; } /* - * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE. + * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*. * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP. */ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || - cmd == FUTEX_WAKE_OP) + cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) val2 = (u32) (unsigned long) utime; return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 3394f8f52964..7d047808419d 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -3,5 +3,5 @@ obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o -obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o +obj-$(CONFIG_NUMA_IRQ_DESC) += numa_migrate.o obj-$(CONFIG_PM_SLEEP) += pm.o diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index c687ba4363f2..13c68e71b726 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -359,7 +359,6 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) spin_lock(&desc->lock); mask_ack_irq(desc, irq); - desc = irq_remap_to_desc(irq, desc); if (unlikely(desc->status & IRQ_INPROGRESS)) goto out_unlock; @@ -438,7 +437,6 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) desc->status &= ~IRQ_INPROGRESS; out: desc->chip->eoi(irq); - desc = irq_remap_to_desc(irq, desc); spin_unlock(&desc->lock); } @@ -475,7 +473,6 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) !desc->action)) { desc->status |= (IRQ_PENDING | IRQ_MASKED); mask_ack_irq(desc, irq); - desc = irq_remap_to_desc(irq, desc); goto out_unlock; } kstat_incr_irqs_this_cpu(irq, desc); @@ -483,7 +480,6 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) /* Start handling the irq */ if (desc->chip->ack) desc->chip->ack(irq); - desc = irq_remap_to_desc(irq, desc); /* Mark the IRQ currently in progress.*/ desc->status |= IRQ_INPROGRESS; @@ -544,10 +540,8 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) if (!noirqdebug) note_interrupt(irq, desc, action_ret); - if (desc->chip->eoi) { + if (desc->chip->eoi) desc->chip->eoi(irq); - desc = irq_remap_to_desc(irq, desc); - } } void @@ -582,10 +576,8 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, /* Uninstall? */ if (handle == handle_bad_irq) { - if (desc->chip != &no_irq_chip) { + if (desc->chip != &no_irq_chip) mask_ack_irq(desc, irq); - desc = irq_remap_to_desc(irq, desc); - } desc->status |= IRQ_DISABLED; desc->depth = 1; } diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 5dd2572993cf..a60018402f42 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -11,6 +11,7 @@ */ #include <linux/irq.h> +#include <linux/slab.h> #include <linux/module.h> #include <linux/random.h> #include <linux/interrupt.h> @@ -81,45 +82,48 @@ static struct irq_desc irq_desc_init = { .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), }; -void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) +void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr) { - int node; void *ptr; - node = cpu_to_node(cpu); - ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), GFP_ATOMIC, node); + if (slab_is_available()) + ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), + GFP_ATOMIC, node); + else + ptr = alloc_bootmem_node(NODE_DATA(node), + nr * sizeof(*desc->kstat_irqs)); /* * don't overwite if can not get new one * init_copy_kstat_irqs() could still use old one */ if (ptr) { - printk(KERN_DEBUG " alloc kstat_irqs on cpu %d node %d\n", - cpu, node); + printk(KERN_DEBUG " alloc kstat_irqs on node %d\n", node); desc->kstat_irqs = ptr; } } -static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) +static void init_one_irq_desc(int irq, struct irq_desc *desc, int node) { memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); spin_lock_init(&desc->lock); desc->irq = irq; #ifdef CONFIG_SMP - desc->cpu = cpu; + desc->node = node; #endif lockdep_set_class(&desc->lock, &irq_desc_lock_class); - init_kstat_irqs(desc, cpu, nr_cpu_ids); + init_kstat_irqs(desc, node, nr_cpu_ids); if (!desc->kstat_irqs) { printk(KERN_ERR "can not alloc kstat_irqs\n"); BUG_ON(1); } - if (!init_alloc_desc_masks(desc, cpu, false)) { + if (!alloc_desc_masks(desc, node, false)) { printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); BUG_ON(1); } - arch_init_chip_data(desc, cpu); + init_desc_masks(desc); + arch_init_chip_data(desc, node); } /* @@ -169,7 +173,8 @@ int __init early_irq_init(void) desc[i].irq = i; desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids; lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); - init_alloc_desc_masks(&desc[i], 0, true); + alloc_desc_masks(&desc[i], 0, true); + init_desc_masks(&desc[i]); irq_desc_ptrs[i] = desc + i; } @@ -187,11 +192,10 @@ struct irq_desc *irq_to_desc(unsigned int irq) return NULL; } -struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) +struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node) { struct irq_desc *desc; unsigned long flags; - int node; if (irq >= nr_irqs) { WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n", @@ -210,15 +214,17 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) if (desc) goto out_unlock; - node = cpu_to_node(cpu); - desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); - printk(KERN_DEBUG " alloc irq_desc for %d on cpu %d node %d\n", - irq, cpu, node); + if (slab_is_available()) + desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); + else + desc = alloc_bootmem_node(NODE_DATA(node), sizeof(*desc)); + + printk(KERN_DEBUG " alloc irq_desc for %d on node %d\n", irq, node); if (!desc) { printk(KERN_ERR "can not alloc irq_desc\n"); BUG_ON(1); } - init_one_irq_desc(irq, desc, cpu); + init_one_irq_desc(irq, desc, node); irq_desc_ptrs[irq] = desc; @@ -256,7 +262,8 @@ int __init early_irq_init(void) for (i = 0; i < count; i++) { desc[i].irq = i; - init_alloc_desc_masks(&desc[i], 0, true); + alloc_desc_masks(&desc[i], 0, true); + init_desc_masks(&desc[i]); desc[i].kstat_irqs = kstat_irqs_all[i]; } return arch_early_irq_init(); @@ -267,7 +274,7 @@ struct irq_desc *irq_to_desc(unsigned int irq) return (irq < NR_IRQS) ? irq_desc + irq : NULL; } -struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) +struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node) { return irq_to_desc(irq); } @@ -450,11 +457,8 @@ unsigned int __do_IRQ(unsigned int irq) /* * No locking required for CPU-local interrupts: */ - if (desc->chip->ack) { + if (desc->chip->ack) desc->chip->ack(irq); - /* get new one */ - desc = irq_remap_to_desc(irq, desc); - } if (likely(!(desc->status & IRQ_DISABLED))) { action_ret = handle_IRQ_event(irq, desc->action); if (!noirqdebug) @@ -465,10 +469,8 @@ unsigned int __do_IRQ(unsigned int irq) } spin_lock(&desc->lock); - if (desc->chip->ack) { + if (desc->chip->ack) desc->chip->ack(irq); - desc = irq_remap_to_desc(irq, desc); - } /* * REPLAY is when Linux resends an IRQ that was dropped earlier * WAITING is used by probe to mark irqs that are being tested diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 01ce20eab38f..73468253143b 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -16,7 +16,7 @@ extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp); extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume); extern struct lock_class_key irq_desc_lock_class; -extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr); +extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); extern void clear_kstat_irqs(struct irq_desc *desc); extern spinlock_t sparse_irq_lock; @@ -42,6 +42,9 @@ static inline void unregister_handler_proc(unsigned int irq, extern int irq_select_affinity_usr(unsigned int irq); +extern void +irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask); + /* * Debugging printout: */ diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 2734eca59243..aaf5c9d05770 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -80,7 +80,7 @@ int irq_can_set_affinity(unsigned int irq) return 1; } -static void +void irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask) { struct irqaction *action = desc->action; @@ -109,17 +109,22 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) spin_lock_irqsave(&desc->lock, flags); #ifdef CONFIG_GENERIC_PENDING_IRQ - if (desc->status & IRQ_MOVE_PCNTXT) - desc->chip->set_affinity(irq, cpumask); + if (desc->status & IRQ_MOVE_PCNTXT) { + if (!desc->chip->set_affinity(irq, cpumask)) { + cpumask_copy(desc->affinity, cpumask); + irq_set_thread_affinity(desc, cpumask); + } + } else { desc->status |= IRQ_MOVE_PENDING; cpumask_copy(desc->pending_mask, cpumask); } #else - cpumask_copy(desc->affinity, cpumask); - desc->chip->set_affinity(irq, cpumask); + if (!desc->chip->set_affinity(irq, cpumask)) { + cpumask_copy(desc->affinity, cpumask); + irq_set_thread_affinity(desc, cpumask); + } #endif - irq_set_thread_affinity(desc, cpumask); desc->status |= IRQ_AFFINITY_SET; spin_unlock_irqrestore(&desc->lock, flags); return 0; diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index e05ad9be43b7..cfe767ca1545 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -1,5 +1,8 @@ #include <linux/irq.h> +#include <linux/interrupt.h> + +#include "internals.h" void move_masked_irq(int irq) { @@ -39,11 +42,12 @@ void move_masked_irq(int irq) * masking the irqs. */ if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) - < nr_cpu_ids)) { - cpumask_and(desc->affinity, - desc->pending_mask, cpu_online_mask); - desc->chip->set_affinity(irq, desc->affinity); - } + < nr_cpu_ids)) + if (!desc->chip->set_affinity(irq, desc->pending_mask)) { + cpumask_copy(desc->affinity, desc->pending_mask); + irq_set_thread_affinity(desc, desc->pending_mask); + } + cpumask_clear(desc->pending_mask); } diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index 44bbdcbaf8d2..2f69bee57bf2 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -15,9 +15,9 @@ static void init_copy_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc, - int cpu, int nr) + int node, int nr) { - init_kstat_irqs(desc, cpu, nr); + init_kstat_irqs(desc, node, nr); if (desc->kstat_irqs != old_desc->kstat_irqs) memcpy(desc->kstat_irqs, old_desc->kstat_irqs, @@ -34,20 +34,20 @@ static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc) } static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, - struct irq_desc *desc, int cpu) + struct irq_desc *desc, int node) { memcpy(desc, old_desc, sizeof(struct irq_desc)); - if (!init_alloc_desc_masks(desc, cpu, false)) { + if (!alloc_desc_masks(desc, node, false)) { printk(KERN_ERR "irq %d: can not get new irq_desc cpumask " "for migration.\n", irq); return false; } spin_lock_init(&desc->lock); - desc->cpu = cpu; + desc->node = node; lockdep_set_class(&desc->lock, &irq_desc_lock_class); - init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); + init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids); init_copy_desc_masks(old_desc, desc); - arch_init_copy_chip_data(old_desc, desc, cpu); + arch_init_copy_chip_data(old_desc, desc, node); return true; } @@ -59,12 +59,11 @@ static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) } static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, - int cpu) + int node) { struct irq_desc *desc; unsigned int irq; unsigned long flags; - int node; irq = old_desc->irq; @@ -76,7 +75,6 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, if (desc && old_desc != desc) goto out_unlock; - node = cpu_to_node(cpu); desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); if (!desc) { printk(KERN_ERR "irq %d: can not get new irq_desc " @@ -85,7 +83,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, desc = old_desc; goto out_unlock; } - if (!init_copy_one_irq_desc(irq, old_desc, desc, cpu)) { + if (!init_copy_one_irq_desc(irq, old_desc, desc, node)) { /* still use old one */ kfree(desc); desc = old_desc; @@ -97,9 +95,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, /* free the old one */ free_one_irq_desc(old_desc, desc); - spin_unlock(&old_desc->lock); kfree(old_desc); - spin_lock(&desc->lock); return desc; @@ -109,24 +105,14 @@ out_unlock: return desc; } -struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu) +struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) { - int old_cpu; - int node, old_node; - /* those all static, do move them */ if (desc->irq < NR_IRQS_LEGACY) return desc; - old_cpu = desc->cpu; - if (old_cpu != cpu) { - node = cpu_to_node(cpu); - old_node = cpu_to_node(old_cpu); - if (old_node != node) - desc = __real_move_irq_desc(desc, cpu); - else - desc->cpu = cpu; - } + if (desc->node != node) + desc = __real_move_irq_desc(desc, node); return desc; } diff --git a/kernel/kexec.c b/kernel/kexec.c index 5a758c6e4950..e4983770913b 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1451,7 +1451,6 @@ int kernel_kexec(void) error = device_suspend(PMSG_FREEZE); if (error) goto Resume_console; - device_pm_lock(); /* At this point, device_suspend() has been called, * but *not* device_power_down(). We *must* * device_power_down() now. Otherwise, drivers for @@ -1489,7 +1488,6 @@ int kernel_kexec(void) enable_nonboot_cpus(); device_power_up(PMSG_RESTORE); Resume_devices: - device_pm_unlock(); device_resume(PMSG_RESTORE); Resume_console: resume_console(); diff --git a/kernel/kmod.c b/kernel/kmod.c index b750675251e5..7e95bedb2bfc 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -370,8 +370,10 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, sub_info->argv = argv; sub_info->envp = envp; sub_info->cred = prepare_usermodehelper_creds(); - if (!sub_info->cred) + if (!sub_info->cred) { + kfree(sub_info); return NULL; + } out: return sub_info; diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h index a2cc7e9a6e84..699a2ac3a0d7 100644 --- a/kernel/lockdep_internals.h +++ b/kernel/lockdep_internals.h @@ -54,9 +54,9 @@ enum { * table (if it's not there yet), and we check it for lock order * conflicts and deadlocks. */ -#define MAX_LOCKDEP_ENTRIES 8192UL +#define MAX_LOCKDEP_ENTRIES 16384UL -#define MAX_LOCKDEP_CHAINS_BITS 14 +#define MAX_LOCKDEP_CHAINS_BITS 15 #define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) #define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) diff --git a/kernel/mutex.c b/kernel/mutex.c index 507cf2b5e9f1..e5cc0cd28d54 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -249,7 +249,9 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, /* didnt get the lock, go to sleep: */ spin_unlock_mutex(&lock->wait_lock, flags); - __schedule(); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); spin_lock_mutex(&lock->wait_lock, flags); } @@ -471,5 +473,28 @@ int __sched mutex_trylock(struct mutex *lock) return ret; } - EXPORT_SYMBOL(mutex_trylock); + +/** + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 + * @cnt: the atomic which we are to dec + * @lock: the mutex to return holding if we dec to 0 + * + * return true and hold lock if we dec to 0, return false otherwise + */ +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) +{ + /* dec if we can't possibly hit 0 */ + if (atomic_add_unless(cnt, -1, 1)) + return 0; + /* we might hit 0, so take the lock */ + mutex_lock(lock); + if (!atomic_dec_and_test(cnt)) { + /* when we actually did the dec, we didn't hit 0 */ + mutex_unlock(lock); + return 0; + } + /* we hit 0, and we hold the lock */ + return 1; +} +EXPORT_SYMBOL(atomic_dec_and_mutex_lock); diff --git a/kernel/panic.c b/kernel/panic.c index 874ecf1307ae..984b3ecbd72c 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -340,39 +340,44 @@ void oops_exit(void) } #ifdef WANT_WARN_ON_SLOWPATH -void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...) -{ +struct slowpath_args { + const char *fmt; va_list args; - char function[KSYM_SYMBOL_LEN]; - unsigned long caller = (unsigned long)__builtin_return_address(0); - const char *board; +}; - sprint_symbol(function, caller); +static void warn_slowpath_common(const char *file, int line, void *caller, struct slowpath_args *args) +{ + const char *board; printk(KERN_WARNING "------------[ cut here ]------------\n"); - printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, - line, function); + printk(KERN_WARNING "WARNING: at %s:%d %pS()\n", file, line, caller); board = dmi_get_system_info(DMI_PRODUCT_NAME); if (board) printk(KERN_WARNING "Hardware name: %s\n", board); - if (*fmt) { - va_start(args, fmt); - vprintk(fmt, args); - va_end(args); - } + if (args) + vprintk(args->fmt, args->args); print_modules(); dump_stack(); print_oops_end_marker(); add_taint(TAINT_WARN); } + +void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...) +{ + struct slowpath_args args; + + args.fmt = fmt; + va_start(args.args, fmt); + warn_slowpath_common(file, line, __builtin_return_address(0), &args); + va_end(args.args); +} EXPORT_SYMBOL(warn_slowpath_fmt); void warn_slowpath_null(const char *file, int line) { - static const char *empty = ""; - warn_slowpath_fmt(file, line, empty); + warn_slowpath_common(file, line, __builtin_return_address(0), NULL); } EXPORT_SYMBOL(warn_slowpath_null); #endif diff --git a/kernel/power/disk.c b/kernel/power/disk.c index e71ca9cd81b2..5cb080e7eebd 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -215,8 +215,6 @@ static int create_image(int platform_mode) if (error) return error; - device_pm_lock(); - /* At this point, device_suspend() has been called, but *not* * device_power_down(). We *must* call device_power_down() now. * Otherwise, drivers for some devices (e.g. interrupt controllers) @@ -227,7 +225,7 @@ static int create_image(int platform_mode) if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting hibernation\n"); - goto Unlock; + return error; } error = platform_pre_snapshot(platform_mode); @@ -241,9 +239,9 @@ static int create_image(int platform_mode) local_irq_disable(); - sysdev_suspend(PMSG_FREEZE); + error = sysdev_suspend(PMSG_FREEZE); if (error) { - printk(KERN_ERR "PM: Some devices failed to power down, " + printk(KERN_ERR "PM: Some system devices failed to power down, " "aborting hibernation\n"); goto Enable_irqs; } @@ -280,9 +278,6 @@ static int create_image(int platform_mode) device_power_up(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); - Unlock: - device_pm_unlock(); - return error; } @@ -344,13 +339,11 @@ static int resume_target_kernel(bool platform_mode) { int error; - device_pm_lock(); - error = device_power_down(PMSG_QUIESCE); if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting resume\n"); - goto Unlock; + return error; } error = platform_pre_restore(platform_mode); @@ -403,9 +396,6 @@ static int resume_target_kernel(bool platform_mode) device_power_up(PMSG_RECOVER); - Unlock: - device_pm_unlock(); - return error; } @@ -464,11 +454,9 @@ int hibernation_platform_enter(void) goto Resume_devices; } - device_pm_lock(); - error = device_power_down(PMSG_HIBERNATE); if (error) - goto Unlock; + goto Resume_devices; error = hibernation_ops->prepare(); if (error) @@ -493,9 +481,6 @@ int hibernation_platform_enter(void) device_power_up(PMSG_RESTORE); - Unlock: - device_pm_unlock(); - Resume_devices: entering_platform_hibernation = false; device_resume(PMSG_RESTORE); diff --git a/kernel/power/main.c b/kernel/power/main.c index f99ed6a75eac..868028280d13 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -289,12 +289,10 @@ static int suspend_enter(suspend_state_t state) { int error; - device_pm_lock(); - if (suspend_ops->prepare) { error = suspend_ops->prepare(); if (error) - goto Done; + return error; } error = device_power_down(PMSG_SUSPEND); @@ -343,9 +341,6 @@ static int suspend_enter(suspend_state_t state) if (suspend_ops->finish) suspend_ops->finish(); - Done: - device_pm_unlock(); - return error; } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index e950805f8630..2442d140bd9a 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -294,6 +294,8 @@ int ptrace_detach(struct task_struct *child, unsigned int data) if (child->ptrace) { child->exit_code = data; dead = __ptrace_detach(current, child); + if (!child->exit_state) + wake_up_process(child); } write_unlock_irq(&tasklist_lock); diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index ce97a4df64d3..beb0e659adcc 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -1356,17 +1356,11 @@ static int rcu_sched_grace_period(void *arg) rcu_ctrlblk.sched_sleep = rcu_sched_sleeping; spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); - ret = 0; + ret = 0; /* unused */ __wait_event_interruptible(rcu_ctrlblk.sched_wq, rcu_ctrlblk.sched_sleep != rcu_sched_sleeping, ret); - /* - * Signals would prevent us from sleeping, and we cannot - * do much with them in any case. So flush them. - */ - if (ret) - flush_signals(current); couldsleepnext = 0; } while (!kthread_should_stop()); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d2a372fb0b9b..0dccfbba6d26 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1259,31 +1259,44 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) check_cpu_stall(rsp, rdp); /* Is the RCU core waiting for a quiescent state from this CPU? */ - if (rdp->qs_pending) + if (rdp->qs_pending) { + rdp->n_rp_qs_pending++; return 1; + } /* Does this CPU have callbacks ready to invoke? */ - if (cpu_has_callbacks_ready_to_invoke(rdp)) + if (cpu_has_callbacks_ready_to_invoke(rdp)) { + rdp->n_rp_cb_ready++; return 1; + } /* Has RCU gone idle with this CPU needing another grace period? */ - if (cpu_needs_another_gp(rsp, rdp)) + if (cpu_needs_another_gp(rsp, rdp)) { + rdp->n_rp_cpu_needs_gp++; return 1; + } /* Has another RCU grace period completed? */ - if (ACCESS_ONCE(rsp->completed) != rdp->completed) /* outside of lock */ + if (ACCESS_ONCE(rsp->completed) != rdp->completed) { /* outside lock */ + rdp->n_rp_gp_completed++; return 1; + } /* Has a new RCU grace period started? */ - if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) /* outside of lock */ + if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) { /* outside lock */ + rdp->n_rp_gp_started++; return 1; + } /* Has an RCU GP gone long enough to send resched IPIs &c? */ if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) && - ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) + ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) { + rdp->n_rp_need_fqs++; return 1; + } /* nothing to do */ + rdp->n_rp_need_nothing++; return 0; } diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 4b1875ba9404..fe1dcdbf1ca3 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -213,7 +213,63 @@ static struct file_operations rcugp_fops = { .release = single_release, }; -static struct dentry *rcudir, *datadir, *datadir_csv, *hierdir, *gpdir; +static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) +{ + seq_printf(m, "%3d%cnp=%ld " + "qsp=%ld cbr=%ld cng=%ld gpc=%ld gps=%ld nf=%ld nn=%ld\n", + rdp->cpu, + cpu_is_offline(rdp->cpu) ? '!' : ' ', + rdp->n_rcu_pending, + rdp->n_rp_qs_pending, + rdp->n_rp_cb_ready, + rdp->n_rp_cpu_needs_gp, + rdp->n_rp_gp_completed, + rdp->n_rp_gp_started, + rdp->n_rp_need_fqs, + rdp->n_rp_need_nothing); +} + +static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp) +{ + int cpu; + struct rcu_data *rdp; + + for_each_possible_cpu(cpu) { + rdp = rsp->rda[cpu]; + if (rdp->beenonline) + print_one_rcu_pending(m, rdp); + } +} + +static int show_rcu_pending(struct seq_file *m, void *unused) +{ + seq_puts(m, "rcu:\n"); + print_rcu_pendings(m, &rcu_state); + seq_puts(m, "rcu_bh:\n"); + print_rcu_pendings(m, &rcu_bh_state); + return 0; +} + +static int rcu_pending_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_rcu_pending, NULL); +} + +static struct file_operations rcu_pending_fops = { + .owner = THIS_MODULE, + .open = rcu_pending_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct dentry *rcudir; +static struct dentry *datadir; +static struct dentry *datadir_csv; +static struct dentry *gpdir; +static struct dentry *hierdir; +static struct dentry *rcu_pendingdir; + static int __init rcuclassic_trace_init(void) { rcudir = debugfs_create_dir("rcu", NULL); @@ -238,6 +294,11 @@ static int __init rcuclassic_trace_init(void) NULL, &rcuhier_fops); if (!hierdir) goto free_out; + + rcu_pendingdir = debugfs_create_file("rcu_pending", 0444, rcudir, + NULL, &rcu_pending_fops); + if (!rcu_pendingdir) + goto free_out; return 0; free_out: if (datadir) @@ -257,6 +318,7 @@ static void __exit rcuclassic_trace_cleanup(void) debugfs_remove(datadir_csv); debugfs_remove(gpdir); debugfs_remove(hierdir); + debugfs_remove(rcu_pendingdir); debugfs_remove(rcudir); } diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 69d9cb921ffa..820c5af44f3e 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -300,7 +300,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, * assigned pending owner [which might not have taken the * lock yet]: */ -static inline int try_to_steal_lock(struct rt_mutex *lock) +static inline int try_to_steal_lock(struct rt_mutex *lock, + struct task_struct *task) { struct task_struct *pendowner = rt_mutex_owner(lock); struct rt_mutex_waiter *next; @@ -309,11 +310,11 @@ static inline int try_to_steal_lock(struct rt_mutex *lock) if (!rt_mutex_owner_pending(lock)) return 0; - if (pendowner == current) + if (pendowner == task) return 1; spin_lock_irqsave(&pendowner->pi_lock, flags); - if (current->prio >= pendowner->prio) { + if (task->prio >= pendowner->prio) { spin_unlock_irqrestore(&pendowner->pi_lock, flags); return 0; } @@ -338,21 +339,21 @@ static inline int try_to_steal_lock(struct rt_mutex *lock) * We are going to steal the lock and a waiter was * enqueued on the pending owners pi_waiters queue. So * we have to enqueue this waiter into - * current->pi_waiters list. This covers the case, - * where current is boosted because it holds another + * task->pi_waiters list. This covers the case, + * where task is boosted because it holds another * lock and gets unboosted because the booster is * interrupted, so we would delay a waiter with higher - * priority as current->normal_prio. + * priority as task->normal_prio. * * Note: in the rare case of a SCHED_OTHER task changing * its priority and thus stealing the lock, next->task - * might be current: + * might be task: */ - if (likely(next->task != current)) { - spin_lock_irqsave(¤t->pi_lock, flags); - plist_add(&next->pi_list_entry, ¤t->pi_waiters); - __rt_mutex_adjust_prio(current); - spin_unlock_irqrestore(¤t->pi_lock, flags); + if (likely(next->task != task)) { + spin_lock_irqsave(&task->pi_lock, flags); + plist_add(&next->pi_list_entry, &task->pi_waiters); + __rt_mutex_adjust_prio(task); + spin_unlock_irqrestore(&task->pi_lock, flags); } return 1; } @@ -389,7 +390,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock) */ mark_rt_mutex_waiters(lock); - if (rt_mutex_owner(lock) && !try_to_steal_lock(lock)) + if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current)) return 0; /* We got the lock. */ @@ -411,6 +412,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock) */ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, + struct task_struct *task, int detect_deadlock) { struct task_struct *owner = rt_mutex_owner(lock); @@ -418,21 +420,21 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, unsigned long flags; int chain_walk = 0, res; - spin_lock_irqsave(¤t->pi_lock, flags); - __rt_mutex_adjust_prio(current); - waiter->task = current; + spin_lock_irqsave(&task->pi_lock, flags); + __rt_mutex_adjust_prio(task); + waiter->task = task; waiter->lock = lock; - plist_node_init(&waiter->list_entry, current->prio); - plist_node_init(&waiter->pi_list_entry, current->prio); + plist_node_init(&waiter->list_entry, task->prio); + plist_node_init(&waiter->pi_list_entry, task->prio); /* Get the top priority waiter on the lock */ if (rt_mutex_has_waiters(lock)) top_waiter = rt_mutex_top_waiter(lock); plist_add(&waiter->list_entry, &lock->wait_list); - current->pi_blocked_on = waiter; + task->pi_blocked_on = waiter; - spin_unlock_irqrestore(¤t->pi_lock, flags); + spin_unlock_irqrestore(&task->pi_lock, flags); if (waiter == rt_mutex_top_waiter(lock)) { spin_lock_irqsave(&owner->pi_lock, flags); @@ -460,7 +462,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, spin_unlock(&lock->wait_lock); res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, - current); + task); spin_lock(&lock->wait_lock); @@ -605,37 +607,25 @@ void rt_mutex_adjust_pi(struct task_struct *task) rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); } -/* - * Slow path lock function: +/** + * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop + * @lock: the rt_mutex to take + * @state: the state the task should block in (TASK_INTERRUPTIBLE + * or TASK_UNINTERRUPTIBLE) + * @timeout: the pre-initialized and started timer, or NULL for none + * @waiter: the pre-initialized rt_mutex_waiter + * @detect_deadlock: passed to task_blocks_on_rt_mutex + * + * lock->wait_lock must be held by the caller. */ static int __sched -rt_mutex_slowlock(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, - int detect_deadlock) +__rt_mutex_slowlock(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, + struct rt_mutex_waiter *waiter, + int detect_deadlock) { - struct rt_mutex_waiter waiter; int ret = 0; - debug_rt_mutex_init_waiter(&waiter); - waiter.task = NULL; - - spin_lock(&lock->wait_lock); - - /* Try to acquire the lock again: */ - if (try_to_take_rt_mutex(lock)) { - spin_unlock(&lock->wait_lock); - return 0; - } - - set_current_state(state); - - /* Setup the timer, when timeout != NULL */ - if (unlikely(timeout)) { - hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); - if (!hrtimer_active(&timeout->timer)) - timeout->task = NULL; - } - for (;;) { /* Try to acquire the lock: */ if (try_to_take_rt_mutex(lock)) @@ -656,19 +646,19 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, } /* - * waiter.task is NULL the first time we come here and + * waiter->task is NULL the first time we come here and * when we have been woken up by the previous owner * but the lock got stolen by a higher prio task. */ - if (!waiter.task) { - ret = task_blocks_on_rt_mutex(lock, &waiter, + if (!waiter->task) { + ret = task_blocks_on_rt_mutex(lock, waiter, current, detect_deadlock); /* * If we got woken up by the owner then start loop * all over without going into schedule to try * to get the lock now: */ - if (unlikely(!waiter.task)) { + if (unlikely(!waiter->task)) { /* * Reset the return value. We might * have returned with -EDEADLK and the @@ -684,15 +674,52 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, spin_unlock(&lock->wait_lock); - debug_rt_mutex_print_deadlock(&waiter); + debug_rt_mutex_print_deadlock(waiter); - if (waiter.task) + if (waiter->task) schedule_rt_mutex(lock); spin_lock(&lock->wait_lock); set_current_state(state); } + return ret; +} + +/* + * Slow path lock function: + */ +static int __sched +rt_mutex_slowlock(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, + int detect_deadlock) +{ + struct rt_mutex_waiter waiter; + int ret = 0; + + debug_rt_mutex_init_waiter(&waiter); + waiter.task = NULL; + + spin_lock(&lock->wait_lock); + + /* Try to acquire the lock again: */ + if (try_to_take_rt_mutex(lock)) { + spin_unlock(&lock->wait_lock); + return 0; + } + + set_current_state(state); + + /* Setup the timer, when timeout != NULL */ + if (unlikely(timeout)) { + hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); + if (!hrtimer_active(&timeout->timer)) + timeout->task = NULL; + } + + ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, + detect_deadlock); + set_current_state(TASK_RUNNING); if (unlikely(waiter.task)) @@ -864,9 +891,9 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock, EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); /** - * rt_mutex_lock_interruptible_ktime - lock a rt_mutex interruptible - * the timeout structure is provided - * by the caller + * rt_mutex_timed_lock - lock a rt_mutex interruptible + * the timeout structure is provided + * by the caller * * @lock: the rt_mutex to be locked * @timeout: timeout structure or NULL (no timeout) @@ -913,7 +940,7 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock) } EXPORT_SYMBOL_GPL(rt_mutex_unlock); -/*** +/** * rt_mutex_destroy - mark a mutex unusable * @lock: the mutex to be destroyed * @@ -986,6 +1013,59 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock, } /** + * rt_mutex_start_proxy_lock() - Start lock acquisition for another task + * @lock: the rt_mutex to take + * @waiter: the pre-initialized rt_mutex_waiter + * @task: the task to prepare + * @detect_deadlock: perform deadlock detection (1) or not (0) + * + * Returns: + * 0 - task blocked on lock + * 1 - acquired the lock for task, caller should wake it up + * <0 - error + * + * Special API call for FUTEX_REQUEUE_PI support. + */ +int rt_mutex_start_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter, + struct task_struct *task, int detect_deadlock) +{ + int ret; + + spin_lock(&lock->wait_lock); + + mark_rt_mutex_waiters(lock); + + if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) { + /* We got the lock for task. */ + debug_rt_mutex_lock(lock); + + rt_mutex_set_owner(lock, task, 0); + + rt_mutex_deadlock_account_lock(lock, task); + return 1; + } + + ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); + + + if (ret && !waiter->task) { + /* + * Reset the return value. We might have + * returned with -EDEADLK and the owner + * released the lock while we were walking the + * pi chain. Let the waiter sort it out. + */ + ret = 0; + } + spin_unlock(&lock->wait_lock); + + debug_rt_mutex_print_deadlock(waiter); + + return ret; +} + +/** * rt_mutex_next_owner - return the next owner of the lock * * @lock: the rt lock query @@ -1004,3 +1084,57 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) return rt_mutex_top_waiter(lock)->task; } + +/** + * rt_mutex_finish_proxy_lock() - Complete lock acquisition + * @lock: the rt_mutex we were woken on + * @to: the timeout, null if none. hrtimer should already have + * been started. + * @waiter: the pre-initialized rt_mutex_waiter + * @detect_deadlock: perform deadlock detection (1) or not (0) + * + * Complete the lock acquisition started our behalf by another thread. + * + * Returns: + * 0 - success + * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK + * + * Special API call for PI-futex requeue support + */ +int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, + struct hrtimer_sleeper *to, + struct rt_mutex_waiter *waiter, + int detect_deadlock) +{ + int ret; + + spin_lock(&lock->wait_lock); + + set_current_state(TASK_INTERRUPTIBLE); + + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, + detect_deadlock); + + set_current_state(TASK_RUNNING); + + if (unlikely(waiter->task)) + remove_waiter(lock, waiter); + + /* + * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might + * have to fix that up. + */ + fixup_rt_mutex_waiters(lock); + + spin_unlock(&lock->wait_lock); + + /* + * Readjust priority, when we did not get the lock. We might have been + * the pending owner and boosted. Since we did not take the lock, the + * PI boost has to go. + */ + if (unlikely(ret)) + rt_mutex_adjust_prio(current); + + return ret; +} diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h index e124bf5800ea..97a2f81866af 100644 --- a/kernel/rtmutex_common.h +++ b/kernel/rtmutex_common.h @@ -120,6 +120,14 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, struct task_struct *proxy_owner); extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, struct task_struct *proxy_owner); +extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter, + struct task_struct *task, + int detect_deadlock); +extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, + struct hrtimer_sleeper *to, + struct rt_mutex_waiter *waiter, + int detect_deadlock); #ifdef CONFIG_DEBUG_RT_MUTEXES # include "rtmutex-debug.h" diff --git a/kernel/sched.c b/kernel/sched.c index 6530a27052f3..14c447ae5d53 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -626,6 +626,10 @@ struct rq { struct list_head migration_queue; #endif + /* calc_load related fields */ + unsigned long calc_load_update; + long calc_load_active; + #ifdef CONFIG_SCHED_HRTICK #ifdef CONFIG_SMP int hrtick_csd_pending; @@ -1724,6 +1728,8 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) } #endif +static void calc_load_account_active(struct rq *this_rq); + #include "sched_stats.h" #include "sched_idletask.c" #include "sched_fair.c" @@ -2497,6 +2503,17 @@ out: return success; } +/** + * wake_up_process - Wake up a specific process + * @p: The process to be woken up. + * + * Attempt to wake up the nominated process and move it to the set of runnable + * processes. Returns 1 if the process was woken up, 0 if it was already + * running. + * + * It may be assumed that this function implies a write memory barrier before + * changing the task state if and only if any tasks are woken up. + */ int wake_up_process(struct task_struct *p) { return try_to_wake_up(p, TASK_ALL, 0); @@ -2805,7 +2822,7 @@ context_switch(struct rq *rq, struct task_struct *prev, * combine the page table reload and the switch backend into * one hypercall. */ - arch_enter_lazy_cpu_mode(); + arch_start_context_switch(prev); if (unlikely(!mm)) { next->active_mm = oldmm; @@ -2895,19 +2912,72 @@ unsigned long nr_iowait(void) return sum; } -unsigned long nr_active(void) +/* Variables and functions for calc_load */ +static atomic_long_t calc_load_tasks; +static unsigned long calc_load_update; +unsigned long avenrun[3]; +EXPORT_SYMBOL(avenrun); + +/** + * get_avenrun - get the load average array + * @loads: pointer to dest load array + * @offset: offset to add + * @shift: shift count to shift the result left + * + * These values are estimates at best, so no need for locking. + */ +void get_avenrun(unsigned long *loads, unsigned long offset, int shift) { - unsigned long i, running = 0, uninterruptible = 0; + loads[0] = (avenrun[0] + offset) << shift; + loads[1] = (avenrun[1] + offset) << shift; + loads[2] = (avenrun[2] + offset) << shift; +} - for_each_online_cpu(i) { - running += cpu_rq(i)->nr_running; - uninterruptible += cpu_rq(i)->nr_uninterruptible; - } +static unsigned long +calc_load(unsigned long load, unsigned long exp, unsigned long active) +{ + load *= exp; + load += active * (FIXED_1 - exp); + return load >> FSHIFT; +} - if (unlikely((long)uninterruptible < 0)) - uninterruptible = 0; +/* + * calc_load - update the avenrun load estimates 10 ticks after the + * CPUs have updated calc_load_tasks. + */ +void calc_global_load(void) +{ + unsigned long upd = calc_load_update + 10; + long active; + + if (time_before(jiffies, upd)) + return; + + active = atomic_long_read(&calc_load_tasks); + active = active > 0 ? active * FIXED_1 : 0; - return running + uninterruptible; + avenrun[0] = calc_load(avenrun[0], EXP_1, active); + avenrun[1] = calc_load(avenrun[1], EXP_5, active); + avenrun[2] = calc_load(avenrun[2], EXP_15, active); + + calc_load_update += LOAD_FREQ; +} + +/* + * Either called from update_cpu_load() or from a cpu going idle + */ +static void calc_load_account_active(struct rq *this_rq) +{ + long nr_active, delta; + + nr_active = this_rq->nr_running; + nr_active += (long) this_rq->nr_uninterruptible; + + if (nr_active != this_rq->calc_load_active) { + delta = nr_active - this_rq->calc_load_active; + this_rq->calc_load_active = nr_active; + atomic_long_add(delta, &calc_load_tasks); + } } /* @@ -2938,6 +3008,11 @@ static void update_cpu_load(struct rq *this_rq) new_load += scale-1; this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i; } + + if (time_after_eq(jiffies, this_rq->calc_load_update)) { + this_rq->calc_load_update += LOAD_FREQ; + calc_load_account_active(this_rq); + } } #ifdef CONFIG_SMP @@ -4279,10 +4354,126 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) static struct { atomic_t load_balancer; cpumask_var_t cpu_mask; + cpumask_var_t ilb_grp_nohz_mask; } nohz ____cacheline_aligned = { .load_balancer = ATOMIC_INIT(-1), }; +#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) +/** + * lowest_flag_domain - Return lowest sched_domain containing flag. + * @cpu: The cpu whose lowest level of sched domain is to + * be returned. + * @flag: The flag to check for the lowest sched_domain + * for the given cpu. + * + * Returns the lowest sched_domain of a cpu which contains the given flag. + */ +static inline struct sched_domain *lowest_flag_domain(int cpu, int flag) +{ + struct sched_domain *sd; + + for_each_domain(cpu, sd) + if (sd && (sd->flags & flag)) + break; + + return sd; +} + +/** + * for_each_flag_domain - Iterates over sched_domains containing the flag. + * @cpu: The cpu whose domains we're iterating over. + * @sd: variable holding the value of the power_savings_sd + * for cpu. + * @flag: The flag to filter the sched_domains to be iterated. + * + * Iterates over all the scheduler domains for a given cpu that has the 'flag' + * set, starting from the lowest sched_domain to the highest. + */ +#define for_each_flag_domain(cpu, sd, flag) \ + for (sd = lowest_flag_domain(cpu, flag); \ + (sd && (sd->flags & flag)); sd = sd->parent) + +/** + * is_semi_idle_group - Checks if the given sched_group is semi-idle. + * @ilb_group: group to be checked for semi-idleness + * + * Returns: 1 if the group is semi-idle. 0 otherwise. + * + * We define a sched_group to be semi idle if it has atleast one idle-CPU + * and atleast one non-idle CPU. This helper function checks if the given + * sched_group is semi-idle or not. + */ +static inline int is_semi_idle_group(struct sched_group *ilb_group) +{ + cpumask_and(nohz.ilb_grp_nohz_mask, nohz.cpu_mask, + sched_group_cpus(ilb_group)); + + /* + * A sched_group is semi-idle when it has atleast one busy cpu + * and atleast one idle cpu. + */ + if (cpumask_empty(nohz.ilb_grp_nohz_mask)) + return 0; + + if (cpumask_equal(nohz.ilb_grp_nohz_mask, sched_group_cpus(ilb_group))) + return 0; + + return 1; +} +/** + * find_new_ilb - Finds the optimum idle load balancer for nomination. + * @cpu: The cpu which is nominating a new idle_load_balancer. + * + * Returns: Returns the id of the idle load balancer if it exists, + * Else, returns >= nr_cpu_ids. + * + * This algorithm picks the idle load balancer such that it belongs to a + * semi-idle powersavings sched_domain. The idea is to try and avoid + * completely idle packages/cores just for the purpose of idle load balancing + * when there are other idle cpu's which are better suited for that job. + */ +static int find_new_ilb(int cpu) +{ + struct sched_domain *sd; + struct sched_group *ilb_group; + + /* + * Have idle load balancer selection from semi-idle packages only + * when power-aware load balancing is enabled + */ + if (!(sched_smt_power_savings || sched_mc_power_savings)) + goto out_done; + + /* + * Optimize for the case when we have no idle CPUs or only one + * idle CPU. Don't walk the sched_domain hierarchy in such cases + */ + if (cpumask_weight(nohz.cpu_mask) < 2) + goto out_done; + + for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) { + ilb_group = sd->groups; + + do { + if (is_semi_idle_group(ilb_group)) + return cpumask_first(nohz.ilb_grp_nohz_mask); + + ilb_group = ilb_group->next; + + } while (ilb_group != sd->groups); + } + +out_done: + return cpumask_first(nohz.cpu_mask); +} +#else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */ +static inline int find_new_ilb(int call_cpu) +{ + return cpumask_first(nohz.cpu_mask); +} +#endif + /* * This routine will try to nominate the ilb (idle load balancing) * owner among the cpus whose ticks are stopped. ilb owner will do the idle @@ -4337,8 +4528,24 @@ int select_nohz_load_balancer(int stop_tick) /* make me the ilb owner */ if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1) return 1; - } else if (atomic_read(&nohz.load_balancer) == cpu) + } else if (atomic_read(&nohz.load_balancer) == cpu) { + int new_ilb; + + if (!(sched_smt_power_savings || + sched_mc_power_savings)) + return 1; + /* + * Check to see if there is a more power-efficient + * ilb. + */ + new_ilb = find_new_ilb(cpu); + if (new_ilb < nr_cpu_ids && new_ilb != cpu) { + atomic_set(&nohz.load_balancer, -1); + resched_cpu(new_ilb); + return 0; + } return 1; + } } else { if (!cpumask_test_cpu(cpu, nohz.cpu_mask)) return 0; @@ -4507,15 +4714,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) } if (atomic_read(&nohz.load_balancer) == -1) { - /* - * simple selection for now: Nominate the - * first cpu in the nohz list to be the next - * ilb owner. - * - * TBD: Traverse the sched domains and nominate - * the nearest cpu in the nohz.cpu_mask. - */ - int ilb = cpumask_first(nohz.cpu_mask); + int ilb = find_new_ilb(cpu); if (ilb < nr_cpu_ids) resched_cpu(ilb); @@ -5046,13 +5245,15 @@ pick_next_task(struct rq *rq) /* * schedule() is the main scheduler function. */ -asmlinkage void __sched __schedule(void) +asmlinkage void __sched schedule(void) { struct task_struct *prev, *next; unsigned long *switch_count; struct rq *rq; int cpu; +need_resched: + preempt_disable(); cpu = smp_processor_id(); rq = cpu_rq(cpu); rcu_qsctr_inc(cpu); @@ -5109,15 +5310,9 @@ need_resched_nonpreemptible: if (unlikely(reacquire_kernel_lock(current) < 0)) goto need_resched_nonpreemptible; -} -asmlinkage void __sched schedule(void) -{ -need_resched: - preempt_disable(); - __schedule(); preempt_enable_no_resched(); - if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) + if (need_resched()) goto need_resched; } EXPORT_SYMBOL(schedule); @@ -5260,7 +5455,7 @@ EXPORT_SYMBOL(default_wake_function); * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns * zero in this (rare) case, and we handle it by continuing to scan the queue. */ -void __wake_up_common(wait_queue_head_t *q, unsigned int mode, +static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, int sync, void *key) { wait_queue_t *curr, *next; @@ -5280,6 +5475,9 @@ void __wake_up_common(wait_queue_head_t *q, unsigned int mode, * @mode: which threads * @nr_exclusive: how many wake-one or wake-many threads to wake up * @key: is directly passed to the wakeup function + * + * It may be assumed that this function implies a write memory barrier before + * changing the task state if and only if any tasks are woken up. */ void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, void *key) @@ -5318,6 +5516,9 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) * with each other. This can prevent needless bouncing between CPUs. * * On UP it can prevent extra preemption. + * + * It may be assumed that this function implies a write memory barrier before + * changing the task state if and only if any tasks are woken up. */ void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, void *key) @@ -5354,6 +5555,9 @@ EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ * awakened in the same order in which they were queued. * * See also complete_all(), wait_for_completion() and related routines. + * + * It may be assumed that this function implies a write memory barrier before + * changing the task state if and only if any tasks are woken up. */ void complete(struct completion *x) { @@ -5371,6 +5575,9 @@ EXPORT_SYMBOL(complete); * @x: holds the state of this particular completion * * This will wake up all threads waiting on this particular completion event. + * + * It may be assumed that this function implies a write memory barrier before + * changing the task state if and only if any tasks are woken up. */ void complete_all(struct completion *x) { @@ -6529,8 +6736,9 @@ void sched_show_task(struct task_struct *p) #ifdef CONFIG_DEBUG_STACK_USAGE free = stack_not_used(p); #endif - printk(KERN_CONT "%5lu %5d %6d\n", free, - task_pid_nr(p), task_pid_nr(p->real_parent)); + printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, + task_pid_nr(p), task_pid_nr(p->real_parent), + (unsigned long)task_thread_info(p)->flags); show_stack(p, NULL); } @@ -7009,6 +7217,14 @@ static void migrate_dead_tasks(unsigned int dead_cpu) } } + +/* + * remove the tasks which were accounted by rq from calc_load_tasks. + */ +static void calc_global_load_remove(struct rq *rq) +{ + atomic_long_sub(rq->calc_load_active, &calc_load_tasks); +} #endif /* CONFIG_HOTPLUG_CPU */ #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) @@ -7243,6 +7459,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) /* Update our root-domain */ rq = cpu_rq(cpu); spin_lock_irqsave(&rq->lock, flags); + rq->calc_load_update = calc_load_update; + rq->calc_load_active = 0; if (rq->rd) { BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); @@ -7282,7 +7500,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) cpuset_unlock(); migrate_nr_uninterruptible(rq); BUG_ON(rq->nr_running != 0); - + calc_global_load_remove(rq); /* * No need to migrate the tasks: it was best-effort if * they didn't take sched_hotcpu_mutex. Just wake up @@ -7792,8 +8010,9 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0; /* * The cpus mask in sched_group and sched_domain hangs off the end. - * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space - * for nr_cpu_ids < CONFIG_NR_CPUS. + * + * ( See the the comments in include/linux/sched.h:struct sched_group + * and struct sched_domain. ) */ struct static_sched_group { struct sched_group sg; @@ -7914,7 +8133,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) struct sched_domain *sd; sd = &per_cpu(phys_domains, j).sd; - if (j != cpumask_first(sched_group_cpus(sd->groups))) { + if (j != group_first_cpu(sd->groups)) { /* * Only add "power" once for each * physical package. @@ -7992,7 +8211,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) WARN_ON(!sd || !sd->groups); - if (cpu != cpumask_first(sched_group_cpus(sd->groups))) + if (cpu != group_first_cpu(sd->groups)) return; child = sd->child; @@ -8977,6 +9196,8 @@ void __init sched_init(void) rq = cpu_rq(i); spin_lock_init(&rq->lock); rq->nr_running = 0; + rq->calc_load_active = 0; + rq->calc_load_update = jiffies + LOAD_FREQ; init_cfs_rq(&rq->cfs, rq); init_rt_rq(&rq->rt, rq); #ifdef CONFIG_FAIR_GROUP_SCHED @@ -9084,6 +9305,9 @@ void __init sched_init(void) * when this runqueue becomes "idle". */ init_idle(current, smp_processor_id()); + + calc_load_update = jiffies + LOAD_FREQ; + /* * During early bootup we pretend to be a normal task: */ @@ -9094,6 +9318,7 @@ void __init sched_init(void) #ifdef CONFIG_SMP #ifdef CONFIG_NO_HZ alloc_bootmem_cpumask_var(&nohz.cpu_mask); + alloc_bootmem_cpumask_var(&nohz.ilb_grp_nohz_mask); #endif alloc_bootmem_cpumask_var(&cpu_isolated_map); #endif /* SMP */ @@ -9839,6 +10064,13 @@ static int sched_rt_global_constraints(void) if (sysctl_sched_rt_period <= 0) return -EINVAL; + /* + * There's always some RT tasks in the root group + * -- migration, kstopmachine etc.. + */ + if (sysctl_sched_rt_runtime == 0) + return -EBUSY; + spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); for_each_possible_cpu(i) { struct rt_rq *rt_rq = &cpu_rq(i)->rt; diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 819f17ac796e..e1d16c9a7680 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c @@ -38,7 +38,8 @@ */ unsigned long long __attribute__((weak)) sched_clock(void) { - return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ); + return (unsigned long long)(jiffies - INITIAL_JIFFIES) + * (NSEC_PER_SEC / HZ); } static __read_mostly int sched_clock_running; diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c index cdd3c89574cd..344712a5e3ed 100644 --- a/kernel/sched_cpupri.c +++ b/kernel/sched_cpupri.c @@ -165,7 +165,7 @@ int __init_refok cpupri_init(struct cpupri *cp, bool bootmem) vec->count = 0; if (bootmem) alloc_bootmem_cpumask_var(&vec->mask); - else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL)) + else if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL)) goto cleanup; } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 3816f217f119..5f9650e8fe75 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1487,17 +1487,10 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) find_matching_se(&se, &pse); - while (se) { - BUG_ON(!pse); + BUG_ON(!pse); - if (wakeup_preempt_entity(se, pse) == 1) { - resched_task(curr); - break; - } - - se = parent_entity(se); - pse = parent_entity(pse); - } + if (wakeup_preempt_entity(se, pse) == 1) + resched_task(curr); } static struct task_struct *pick_next_task_fair(struct rq *rq) diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 8a21a2e28c13..499672c10cbd 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -22,7 +22,8 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sy static struct task_struct *pick_next_task_idle(struct rq *rq) { schedstat_inc(rq, sched_goidle); - + /* adjust the active tasks as we might go into a long sleep */ + calc_load_account_active(rq); return rq->idle; } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index f2c66f8f9712..9bf0d2a73045 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1591,7 +1591,7 @@ static inline void init_sched_rt_class(void) unsigned int i; for_each_possible_cpu(i) - alloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), + zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), GFP_KERNEL, cpu_to_node(i)); } #endif /* CONFIG_SMP */ diff --git a/kernel/signal.c b/kernel/signal.c index 94ec0a4dde0f..dba6ae99978a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2276,24 +2276,17 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig) return kill_something_info(sig, &info, pid); } -static int do_tkill(pid_t tgid, pid_t pid, int sig) +static int +do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) { - int error; - struct siginfo info; struct task_struct *p; unsigned long flags; - - error = -ESRCH; - info.si_signo = sig; - info.si_errno = 0; - info.si_code = SI_TKILL; - info.si_pid = task_tgid_vnr(current); - info.si_uid = current_uid(); + int error = -ESRCH; rcu_read_lock(); p = find_task_by_vpid(pid); if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) { - error = check_kill_permission(sig, &info, p); + error = check_kill_permission(sig, info, p); /* * The null signal is a permissions and process existence * probe. No signal is actually delivered. @@ -2303,7 +2296,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig) * signal is private anyway. */ if (!error && sig && lock_task_sighand(p, &flags)) { - error = specific_send_sig_info(sig, &info, p); + error = specific_send_sig_info(sig, info, p); unlock_task_sighand(p, &flags); } } @@ -2312,6 +2305,19 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig) return error; } +static int do_tkill(pid_t tgid, pid_t pid, int sig) +{ + struct siginfo info; + + info.si_signo = sig; + info.si_errno = 0; + info.si_code = SI_TKILL; + info.si_pid = task_tgid_vnr(current); + info.si_uid = current_uid(); + + return do_send_specific(tgid, pid, sig, &info); +} + /** * sys_tgkill - send signal to one specific thread * @tgid: the thread group ID of the thread @@ -2361,6 +2367,32 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig, return kill_proc_info(sig, &info, pid); } +long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info) +{ + /* This is only valid for single tasks */ + if (pid <= 0 || tgid <= 0) + return -EINVAL; + + /* Not even root can pretend to send signals from the kernel. + Nor can they impersonate a kill(), which adds source info. */ + if (info->si_code >= 0) + return -EPERM; + info->si_signo = sig; + + return do_send_specific(tgid, pid, sig, info); +} + +SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig, + siginfo_t __user *, uinfo) +{ + siginfo_t info; + + if (copy_from_user(&info, uinfo, sizeof(siginfo_t))) + return -EFAULT; + + return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); +} + int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) { struct task_struct *t = current; diff --git a/kernel/smp.c b/kernel/smp.c index 858baac568ee..ad63d8501207 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -52,7 +52,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - if (!alloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, + if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, cpu_to_node(cpu))) return NOTIFY_BAD; break; diff --git a/kernel/softirq.c b/kernel/softirq.c index dc4d0cfdcb2d..258885a543db 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -827,7 +827,7 @@ int __init __weak arch_early_irq_init(void) return 0; } -int __weak arch_init_chip_data(struct irq_desc *desc, int cpu) +int __weak arch_init_chip_data(struct irq_desc *desc, int node) { return 0; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b2970d56fb76..6a463716ecbf 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -731,6 +731,14 @@ static struct ctl_table kern_table[] = { }, { .ctl_name = CTL_UNNUMBERED, + .procname = "bootloader_version", + .data = &bootloader_version, + .maxlen = sizeof (int), + .mode = 0444, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, .procname = "kstack_depth_to_print", .data = &kstack_depth_to_print, .maxlen = sizeof(int), diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 687dff49f6e7..52a8bf8931f3 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -22,7 +22,7 @@ /* * This read-write spinlock protects us from races in SMP while - * playing with xtime and avenrun. + * playing with xtime. */ __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); diff --git a/kernel/timer.c b/kernel/timer.c index cffffad01c31..a26ed294f938 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1123,47 +1123,6 @@ void update_process_times(int user_tick) } /* - * Nr of active tasks - counted in fixed-point numbers - */ -static unsigned long count_active_tasks(void) -{ - return nr_active() * FIXED_1; -} - -/* - * Hmm.. Changed this, as the GNU make sources (load.c) seems to - * imply that avenrun[] is the standard name for this kind of thing. - * Nothing else seems to be standardized: the fractional size etc - * all seem to differ on different machines. - * - * Requires xtime_lock to access. - */ -unsigned long avenrun[3]; - -EXPORT_SYMBOL(avenrun); - -/* - * calc_load - given tick count, update the avenrun load estimates. - * This is called while holding a write_lock on xtime_lock. - */ -static inline void calc_load(unsigned long ticks) -{ - unsigned long active_tasks; /* fixed-point */ - static int count = LOAD_FREQ; - - count -= ticks; - if (unlikely(count < 0)) { - active_tasks = count_active_tasks(); - do { - CALC_LOAD(avenrun[0], EXP_1, active_tasks); - CALC_LOAD(avenrun[1], EXP_5, active_tasks); - CALC_LOAD(avenrun[2], EXP_15, active_tasks); - count += LOAD_FREQ; - } while (count < 0); - } -} - -/* * This function runs timers and the timer-tq in bottom half context. */ static void run_timer_softirq(struct softirq_action *h) @@ -1187,16 +1146,6 @@ void run_local_timers(void) } /* - * Called by the timer interrupt. xtime_lock must already be taken - * by the timer IRQ! - */ -static inline void update_times(unsigned long ticks) -{ - update_wall_time(); - calc_load(ticks); -} - -/* * The 64-bit jiffies value is not atomic - you MUST NOT read it * without sampling the sequence number in xtime_lock. * jiffies is defined in the linker script... @@ -1205,7 +1154,8 @@ static inline void update_times(unsigned long ticks) void do_timer(unsigned long ticks) { jiffies_64 += ticks; - update_times(ticks); + update_wall_time(); + calc_global_load(); } #ifdef __ARCH_WANT_SYS_ALARM @@ -1406,37 +1356,17 @@ int do_sysinfo(struct sysinfo *info) { unsigned long mem_total, sav_total; unsigned int mem_unit, bitcount; - unsigned long seq; + struct timespec tp; memset(info, 0, sizeof(struct sysinfo)); - do { - struct timespec tp; - seq = read_seqbegin(&xtime_lock); - - /* - * This is annoying. The below is the same thing - * posix_get_clock_monotonic() does, but it wants to - * take the lock which we want to cover the loads stuff - * too. - */ - - getnstimeofday(&tp); - tp.tv_sec += wall_to_monotonic.tv_sec; - tp.tv_nsec += wall_to_monotonic.tv_nsec; - monotonic_to_bootbased(&tp); - if (tp.tv_nsec - NSEC_PER_SEC >= 0) { - tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; - tp.tv_sec++; - } - info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); + ktime_get_ts(&tp); + monotonic_to_bootbased(&tp); + info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); - info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); - info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); - info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); + get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); - info->procs = nr_threads; - } while (read_seqretry(&xtime_lock, seq)); + info->procs = nr_threads; si_meminfo(info); si_swapinfo(info); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index cae34c69752f..8acd9b81a5d7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2428,7 +2428,7 @@ static const char readme_msg[] = "# echo print-parent > /debug/tracing/trace_options\n" "# echo 1 > /debug/tracing/tracing_enabled\n" "# cat /debug/tracing/trace > /tmp/trace.txt\n" - "echo 0 > /debug/tracing/tracing_enabled\n" + "# echo 0 > /debug/tracing/tracing_enabled\n" ; static ssize_t diff --git a/kernel/wait.c b/kernel/wait.c index 42a2dbc181c8..ea7c3b4275cf 100644 --- a/kernel/wait.c +++ b/kernel/wait.c @@ -154,7 +154,7 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, if (!list_empty(&wait->task_list)) list_del_init(&wait->task_list); else if (waitqueue_active(q)) - __wake_up_common(q, mode, 1, 0, key); + __wake_up_locked_key(q, mode, key); spin_unlock_irqrestore(&q->lock, flags); } EXPORT_SYMBOL(abort_exclusive_wait); diff --git a/lib/cpumask.c b/lib/cpumask.c index 1f71b97de0f9..eb23aaa0c7b8 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -119,6 +119,12 @@ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) } EXPORT_SYMBOL(alloc_cpumask_var_node); +bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) +{ + return alloc_cpumask_var_node(mask, flags | __GFP_ZERO, node); +} +EXPORT_SYMBOL(zalloc_cpumask_var_node); + /** * alloc_cpumask_var - allocate a struct cpumask * @mask: pointer to cpumask_var_t where the cpumask is returned @@ -135,6 +141,12 @@ bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) } EXPORT_SYMBOL(alloc_cpumask_var); +bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +{ + return alloc_cpumask_var(mask, flags | __GFP_ZERO); +} +EXPORT_SYMBOL(zalloc_cpumask_var); + /** * alloc_bootmem_cpumask_var - allocate a struct cpumask from the bootmem arena. * @mask: pointer to cpumask_var_t where the cpumask is returned diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 69da09a085a1..ad65fc0317d9 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -23,9 +23,11 @@ #include <linux/dma-debug.h> #include <linux/spinlock.h> #include <linux/debugfs.h> +#include <linux/uaccess.h> #include <linux/device.h> #include <linux/types.h> #include <linux/sched.h> +#include <linux/ctype.h> #include <linux/list.h> #include <linux/slab.h> @@ -85,6 +87,7 @@ static u32 show_num_errors = 1; static u32 num_free_entries; static u32 min_free_entries; +static u32 nr_total_entries; /* number of preallocated entries requested by kernel cmdline */ static u32 req_entries; @@ -97,6 +100,16 @@ static struct dentry *show_all_errors_dent __read_mostly; static struct dentry *show_num_errors_dent __read_mostly; static struct dentry *num_free_entries_dent __read_mostly; static struct dentry *min_free_entries_dent __read_mostly; +static struct dentry *filter_dent __read_mostly; + +/* per-driver filter related state */ + +#define NAME_MAX_LEN 64 + +static char current_driver_name[NAME_MAX_LEN] __read_mostly; +static struct device_driver *current_driver __read_mostly; + +static DEFINE_RWLOCK(driver_name_lock); static const char *type2name[4] = { "single", "page", "scather-gather", "coherent" }; @@ -104,6 +117,11 @@ static const char *type2name[4] = { "single", "page", static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", "DMA_FROM_DEVICE", "DMA_NONE" }; +/* little merge helper - remove it after the merge window */ +#ifndef BUS_NOTIFY_UNBOUND_DRIVER +#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005 +#endif + /* * The access to some variables in this macro is racy. We can't use atomic_t * here because all these variables are exported to debugfs. Some of them even @@ -121,15 +139,54 @@ static inline void dump_entry_trace(struct dma_debug_entry *entry) { #ifdef CONFIG_STACKTRACE if (entry) { - printk(KERN_WARNING "Mapped at:\n"); + pr_warning("Mapped at:\n"); print_stack_trace(&entry->stacktrace, 0); } #endif } +static bool driver_filter(struct device *dev) +{ + struct device_driver *drv; + unsigned long flags; + bool ret; + + /* driver filter off */ + if (likely(!current_driver_name[0])) + return true; + + /* driver filter on and initialized */ + if (current_driver && dev->driver == current_driver) + return true; + + if (current_driver || !current_driver_name[0]) + return false; + + /* driver filter on but not yet initialized */ + drv = get_driver(dev->driver); + if (!drv) + return false; + + /* lock to protect against change of current_driver_name */ + read_lock_irqsave(&driver_name_lock, flags); + + ret = false; + if (drv->name && + strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) { + current_driver = drv; + ret = true; + } + + read_unlock_irqrestore(&driver_name_lock, flags); + put_driver(drv); + + return ret; +} + #define err_printk(dev, entry, format, arg...) do { \ error_count += 1; \ - if (show_all_errors || show_num_errors > 0) { \ + if (driver_filter(dev) && \ + (show_all_errors || show_num_errors > 0)) { \ WARN(1, "%s %s: " format, \ dev_driver_string(dev), \ dev_name(dev) , ## arg); \ @@ -185,15 +242,50 @@ static void put_hash_bucket(struct hash_bucket *bucket, static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket, struct dma_debug_entry *ref) { - struct dma_debug_entry *entry; + struct dma_debug_entry *entry, *ret = NULL; + int matches = 0, match_lvl, last_lvl = 0; list_for_each_entry(entry, &bucket->list, list) { - if ((entry->dev_addr == ref->dev_addr) && - (entry->dev == ref->dev)) + if ((entry->dev_addr != ref->dev_addr) || + (entry->dev != ref->dev)) + continue; + + /* + * Some drivers map the same physical address multiple + * times. Without a hardware IOMMU this results in the + * same device addresses being put into the dma-debug + * hash multiple times too. This can result in false + * positives being reported. Therfore we implement a + * best-fit algorithm here which returns the entry from + * the hash which fits best to the reference value + * instead of the first-fit. + */ + matches += 1; + match_lvl = 0; + entry->size == ref->size ? ++match_lvl : match_lvl; + entry->type == ref->type ? ++match_lvl : match_lvl; + entry->direction == ref->direction ? ++match_lvl : match_lvl; + + if (match_lvl == 3) { + /* perfect-fit - return the result */ return entry; + } else if (match_lvl > last_lvl) { + /* + * We found an entry that fits better then the + * previous one + */ + last_lvl = match_lvl; + ret = entry; + } } - return NULL; + /* + * If we have multiple matches but no perfect-fit, just return + * NULL. + */ + ret = (matches == 1) ? ret : NULL; + + return ret; } /* @@ -257,6 +349,21 @@ static void add_dma_entry(struct dma_debug_entry *entry) put_hash_bucket(bucket, &flags); } +static struct dma_debug_entry *__dma_entry_alloc(void) +{ + struct dma_debug_entry *entry; + + entry = list_entry(free_entries.next, struct dma_debug_entry, list); + list_del(&entry->list); + memset(entry, 0, sizeof(*entry)); + + num_free_entries -= 1; + if (num_free_entries < min_free_entries) + min_free_entries = num_free_entries; + + return entry; +} + /* struct dma_entry allocator * * The next two functions implement the allocator for @@ -270,15 +377,12 @@ static struct dma_debug_entry *dma_entry_alloc(void) spin_lock_irqsave(&free_entries_lock, flags); if (list_empty(&free_entries)) { - printk(KERN_ERR "DMA-API: debugging out of memory " - "- disabling\n"); + pr_err("DMA-API: debugging out of memory - disabling\n"); global_disable = true; goto out; } - entry = list_entry(free_entries.next, struct dma_debug_entry, list); - list_del(&entry->list); - memset(entry, 0, sizeof(*entry)); + entry = __dma_entry_alloc(); #ifdef CONFIG_STACKTRACE entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES; @@ -286,9 +390,6 @@ static struct dma_debug_entry *dma_entry_alloc(void) entry->stacktrace.skip = 2; save_stack_trace(&entry->stacktrace); #endif - num_free_entries -= 1; - if (num_free_entries < min_free_entries) - min_free_entries = num_free_entries; out: spin_unlock_irqrestore(&free_entries_lock, flags); @@ -310,6 +411,53 @@ static void dma_entry_free(struct dma_debug_entry *entry) spin_unlock_irqrestore(&free_entries_lock, flags); } +int dma_debug_resize_entries(u32 num_entries) +{ + int i, delta, ret = 0; + unsigned long flags; + struct dma_debug_entry *entry; + LIST_HEAD(tmp); + + spin_lock_irqsave(&free_entries_lock, flags); + + if (nr_total_entries < num_entries) { + delta = num_entries - nr_total_entries; + + spin_unlock_irqrestore(&free_entries_lock, flags); + + for (i = 0; i < delta; i++) { + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + break; + + list_add_tail(&entry->list, &tmp); + } + + spin_lock_irqsave(&free_entries_lock, flags); + + list_splice(&tmp, &free_entries); + nr_total_entries += i; + num_free_entries += i; + } else { + delta = nr_total_entries - num_entries; + + for (i = 0; i < delta && !list_empty(&free_entries); i++) { + entry = __dma_entry_alloc(); + kfree(entry); + } + + nr_total_entries -= i; + } + + if (nr_total_entries != num_entries) + ret = 1; + + spin_unlock_irqrestore(&free_entries_lock, flags); + + return ret; +} +EXPORT_SYMBOL(dma_debug_resize_entries); + /* * DMA-API debugging init code * @@ -334,8 +482,7 @@ static int prealloc_memory(u32 num_entries) num_free_entries = num_entries; min_free_entries = num_entries; - printk(KERN_INFO "DMA-API: preallocated %d debug entries\n", - num_entries); + pr_info("DMA-API: preallocated %d debug entries\n", num_entries); return 0; @@ -349,11 +496,102 @@ out_err: return -ENOMEM; } +static ssize_t filter_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + char buf[NAME_MAX_LEN + 1]; + unsigned long flags; + int len; + + if (!current_driver_name[0]) + return 0; + + /* + * We can't copy to userspace directly because current_driver_name can + * only be read under the driver_name_lock with irqs disabled. So + * create a temporary copy first. + */ + read_lock_irqsave(&driver_name_lock, flags); + len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name); + read_unlock_irqrestore(&driver_name_lock, flags); + + return simple_read_from_buffer(user_buf, count, ppos, buf, len); +} + +static ssize_t filter_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *ppos) +{ + char buf[NAME_MAX_LEN]; + unsigned long flags; + size_t len; + int i; + + /* + * We can't copy from userspace directly. Access to + * current_driver_name is protected with a write_lock with irqs + * disabled. Since copy_from_user can fault and may sleep we + * need to copy to temporary buffer first + */ + len = min(count, (size_t)(NAME_MAX_LEN - 1)); + if (copy_from_user(buf, userbuf, len)) + return -EFAULT; + + buf[len] = 0; + + write_lock_irqsave(&driver_name_lock, flags); + + /* + * Now handle the string we got from userspace very carefully. + * The rules are: + * - only use the first token we got + * - token delimiter is everything looking like a space + * character (' ', '\n', '\t' ...) + * + */ + if (!isalnum(buf[0])) { + /* + * If the first character userspace gave us is not + * alphanumerical then assume the filter should be + * switched off. + */ + if (current_driver_name[0]) + pr_info("DMA-API: switching off dma-debug driver filter\n"); + current_driver_name[0] = 0; + current_driver = NULL; + goto out_unlock; + } + + /* + * Now parse out the first token and use it as the name for the + * driver to filter for. + */ + for (i = 0; i < NAME_MAX_LEN; ++i) { + current_driver_name[i] = buf[i]; + if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0) + break; + } + current_driver_name[i] = 0; + current_driver = NULL; + + pr_info("DMA-API: enable driver filter for driver [%s]\n", + current_driver_name); + +out_unlock: + write_unlock_irqrestore(&driver_name_lock, flags); + + return count; +} + +const struct file_operations filter_fops = { + .read = filter_read, + .write = filter_write, +}; + static int dma_debug_fs_init(void) { dma_debug_dent = debugfs_create_dir("dma-api", NULL); if (!dma_debug_dent) { - printk(KERN_ERR "DMA-API: can not create debugfs directory\n"); + pr_err("DMA-API: can not create debugfs directory\n"); return -ENOMEM; } @@ -392,6 +630,11 @@ static int dma_debug_fs_init(void) if (!min_free_entries_dent) goto out_err; + filter_dent = debugfs_create_file("driver_filter", 0644, + dma_debug_dent, NULL, &filter_fops); + if (!filter_dent) + goto out_err; + return 0; out_err: @@ -400,9 +643,64 @@ out_err: return -ENOMEM; } +static int device_dma_allocations(struct device *dev) +{ + struct dma_debug_entry *entry; + unsigned long flags; + int count = 0, i; + + local_irq_save(flags); + + for (i = 0; i < HASH_SIZE; ++i) { + spin_lock(&dma_entry_hash[i].lock); + list_for_each_entry(entry, &dma_entry_hash[i].list, list) { + if (entry->dev == dev) + count += 1; + } + spin_unlock(&dma_entry_hash[i].lock); + } + + local_irq_restore(flags); + + return count; +} + +static int dma_debug_device_change(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + int count; + + + switch (action) { + case BUS_NOTIFY_UNBOUND_DRIVER: + count = device_dma_allocations(dev); + if (count == 0) + break; + err_printk(dev, NULL, "DMA-API: device driver has pending " + "DMA allocations while released from device " + "[count=%d]\n", count); + break; + default: + break; + } + + return 0; +} + void dma_debug_add_bus(struct bus_type *bus) { - /* FIXME: register notifier */ + struct notifier_block *nb; + + nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); + if (nb == NULL) { + pr_err("dma_debug_add_bus: out of memory\n"); + return; + } + + nb->notifier_call = dma_debug_device_change; + + bus_register_notifier(bus, nb); } /* @@ -421,8 +719,7 @@ void dma_debug_init(u32 num_entries) } if (dma_debug_fs_init() != 0) { - printk(KERN_ERR "DMA-API: error creating debugfs entries " - "- disabling\n"); + pr_err("DMA-API: error creating debugfs entries - disabling\n"); global_disable = true; return; @@ -432,14 +729,15 @@ void dma_debug_init(u32 num_entries) num_entries = req_entries; if (prealloc_memory(num_entries) != 0) { - printk(KERN_ERR "DMA-API: debugging out of memory error " - "- disabled\n"); + pr_err("DMA-API: debugging out of memory error - disabled\n"); global_disable = true; return; } - printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n"); + nr_total_entries = num_free_entries; + + pr_info("DMA-API: debugging enabled by kernel config\n"); } static __init int dma_debug_cmdline(char *str) @@ -448,8 +746,7 @@ static __init int dma_debug_cmdline(char *str) return -EINVAL; if (strncmp(str, "off", 3) == 0) { - printk(KERN_INFO "DMA-API: debugging disabled on kernel " - "command line\n"); + pr_info("DMA-API: debugging disabled on kernel command line\n"); global_disable = true; } @@ -723,15 +1020,15 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, entry->type = dma_debug_sg; entry->dev = dev; entry->paddr = sg_phys(s); - entry->size = s->length; - entry->dev_addr = s->dma_address; + entry->size = sg_dma_len(s); + entry->dev_addr = sg_dma_address(s); entry->direction = direction; entry->sg_call_ents = nents; entry->sg_mapped_ents = mapped_ents; if (!PageHighMem(sg_page(s))) { check_for_stack(dev, sg_virt(s)); - check_for_illegal_area(dev, sg_virt(s), s->length); + check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s)); } add_dma_entry(entry); @@ -739,13 +1036,33 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, } EXPORT_SYMBOL(debug_dma_map_sg); +static int get_nr_mapped_entries(struct device *dev, struct scatterlist *s) +{ + struct dma_debug_entry *entry, ref; + struct hash_bucket *bucket; + unsigned long flags; + int mapped_ents; + + ref.dev = dev; + ref.dev_addr = sg_dma_address(s); + ref.size = sg_dma_len(s), + + bucket = get_hash_bucket(&ref, &flags); + entry = hash_bucket_find(bucket, &ref); + mapped_ents = 0; + + if (entry) + mapped_ents = entry->sg_mapped_ents; + put_hash_bucket(bucket, &flags); + + return mapped_ents; +} + void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, int dir) { - struct dma_debug_entry *entry; struct scatterlist *s; int mapped_ents = 0, i; - unsigned long flags; if (unlikely(global_disable)) return; @@ -756,8 +1073,8 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, .type = dma_debug_sg, .dev = dev, .paddr = sg_phys(s), - .dev_addr = s->dma_address, - .size = s->length, + .dev_addr = sg_dma_address(s), + .size = sg_dma_len(s), .direction = dir, .sg_call_ents = 0, }; @@ -765,14 +1082,9 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, if (mapped_ents && i >= mapped_ents) break; - if (mapped_ents == 0) { - struct hash_bucket *bucket; + if (!i) { ref.sg_call_ents = nelems; - bucket = get_hash_bucket(&ref, &flags); - entry = hash_bucket_find(bucket, &ref); - if (entry) - mapped_ents = entry->sg_mapped_ents; - put_hash_bucket(bucket, &flags); + mapped_ents = get_nr_mapped_entries(dev, s); } check_unmap(&ref); @@ -874,14 +1186,20 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, int direction) { struct scatterlist *s; - int i; + int mapped_ents = 0, i; if (unlikely(global_disable)) return; for_each_sg(sg, s, nelems, i) { - check_sync(dev, s->dma_address, s->dma_length, 0, - direction, true); + if (!i) + mapped_ents = get_nr_mapped_entries(dev, s); + + if (i >= mapped_ents) + break; + + check_sync(dev, sg_dma_address(s), sg_dma_len(s), 0, + direction, true); } } EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu); @@ -890,15 +1208,39 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, int direction) { struct scatterlist *s; - int i; + int mapped_ents = 0, i; if (unlikely(global_disable)) return; for_each_sg(sg, s, nelems, i) { - check_sync(dev, s->dma_address, s->dma_length, 0, - direction, false); + if (!i) + mapped_ents = get_nr_mapped_entries(dev, s); + + if (i >= mapped_ents) + break; + + check_sync(dev, sg_dma_address(s), sg_dma_len(s), 0, + direction, false); } } EXPORT_SYMBOL(debug_dma_sync_sg_for_device); +static int __init dma_debug_driver_setup(char *str) +{ + int i; + + for (i = 0; i < NAME_MAX_LEN - 1; ++i, ++str) { + current_driver_name[i] = *str; + if (*str == 0) + break; + } + + if (current_driver_name[0]) + pr_info("DMA-API: enable driver filter for driver [%s]\n", + current_driver_name); + + + return 1; +} +__setup("dma_debug_driver=", dma_debug_driver_setup); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 2b0b5a7d2ced..bffe6d7ef9d9 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -60,8 +60,8 @@ enum dma_sync_target { int swiotlb_force; /* - * Used to do a quick range check in swiotlb_unmap_single and - * swiotlb_sync_single_*, to see if the memory was in fact allocated by this + * Used to do a quick range check in unmap_single and + * sync_single_*, to see if the memory was in fact allocated by this * API. */ static char *io_tlb_start, *io_tlb_end; @@ -129,7 +129,7 @@ dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) return paddr; } -phys_addr_t __weak swiotlb_bus_to_phys(dma_addr_t baddr) +phys_addr_t __weak swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) { return baddr; } @@ -140,9 +140,15 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, return swiotlb_phys_to_bus(hwdev, virt_to_phys(address)); } -static void *swiotlb_bus_to_virt(dma_addr_t address) +void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address) { - return phys_to_virt(swiotlb_bus_to_phys(address)); + return phys_to_virt(swiotlb_bus_to_phys(hwdev, address)); +} + +int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev, + dma_addr_t addr, size_t size) +{ + return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size); } int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) @@ -309,10 +315,10 @@ cleanup1: return -ENOMEM; } -static int +static inline int address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size) { - return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size); + return swiotlb_arch_address_needs_mapping(hwdev, addr, size); } static inline int range_needs_mapping(phys_addr_t paddr, size_t size) @@ -341,7 +347,7 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, unsigned long flags; while (size) { - sz = min(PAGE_SIZE - offset, size); + sz = min_t(size_t, PAGE_SIZE - offset, size); local_irq_save(flags); buffer = kmap_atomic(pfn_to_page(pfn), @@ -476,7 +482,7 @@ found: * dma_addr is the kernel virtual address of the bounce buffer to unmap. */ static void -unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) +do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) { unsigned long flags; int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; @@ -560,7 +566,6 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, size)) { /* * The allocated memory isn't reachable by the device. - * Fall back on swiotlb_map_single(). */ free_pages((unsigned long) ret, order); ret = NULL; @@ -568,9 +573,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, if (!ret) { /* * We are either out of memory or the device can't DMA - * to GFP_DMA memory; fall back on - * swiotlb_map_single(), which will grab memory from - * the lowest available address range. + * to GFP_DMA memory; fall back on map_single(), which + * will grab memory from the lowest available address range. */ ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE); if (!ret) @@ -587,7 +591,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, (unsigned long long)dev_addr); /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ - unmap_single(hwdev, ret, size, DMA_TO_DEVICE); + do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); return NULL; } *dma_handle = dev_addr; @@ -604,7 +608,7 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, free_pages((unsigned long) vaddr, get_order(size)); else /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ - unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); + do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); } EXPORT_SYMBOL(swiotlb_free_coherent); @@ -634,7 +638,7 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) * physical address to use is returned. * * Once the device is given the dma address, the device owns this memory until - * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed. + * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed. */ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, @@ -642,18 +646,17 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, struct dma_attrs *attrs) { phys_addr_t phys = page_to_phys(page) + offset; - void *ptr = page_address(page) + offset; dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys); void *map; BUG_ON(dir == DMA_NONE); /* - * If the pointer passed in happens to be in the device's DMA window, + * If the address happens to be in the device's DMA window, * we can safely return the device addr and not worry about bounce * buffering it. */ if (!address_needs_mapping(dev, dev_addr, size) && - !range_needs_mapping(virt_to_phys(ptr), size)) + !range_needs_mapping(phys, size)) return dev_addr; /* @@ -679,23 +682,35 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page); /* * Unmap a single streaming mode DMA translation. The dma_addr and size must - * match what was provided for in a previous swiotlb_map_single call. All + * match what was provided for in a previous swiotlb_map_page call. All * other usages are undefined. * * After this call, reads by the cpu to the buffer are guaranteed to see * whatever the device wrote there. */ +static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir) +{ + char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr); + + BUG_ON(dir == DMA_NONE); + + if (is_swiotlb_buffer(dma_addr)) { + do_unmap_single(hwdev, dma_addr, size, dir); + return; + } + + if (dir != DMA_FROM_DEVICE) + return; + + dma_mark_clean(dma_addr, size); +} + void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - char *dma_addr = swiotlb_bus_to_virt(dev_addr); - - BUG_ON(dir == DMA_NONE); - if (is_swiotlb_buffer(dma_addr)) - unmap_single(hwdev, dma_addr, size, dir); - else if (dir == DMA_FROM_DEVICE) - dma_mark_clean(dma_addr, size); + unmap_single(hwdev, dev_addr, size, dir); } EXPORT_SYMBOL_GPL(swiotlb_unmap_page); @@ -703,7 +718,7 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page); * Make physical memory consistent for a single streaming mode DMA translation * after a transfer. * - * If you perform a swiotlb_map_single() but wish to interrogate the buffer + * If you perform a swiotlb_map_page() but wish to interrogate the buffer * using the cpu, yet do not wish to teardown the dma mapping, you must * call this function before doing so. At the next point you give the dma * address back to the card, you must first perform a @@ -713,13 +728,19 @@ static void swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, int dir, int target) { - char *dma_addr = swiotlb_bus_to_virt(dev_addr); + char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); - if (is_swiotlb_buffer(dma_addr)) + + if (is_swiotlb_buffer(dma_addr)) { sync_single(hwdev, dma_addr, size, dir, target); - else if (dir == DMA_FROM_DEVICE) - dma_mark_clean(dma_addr, size); + return; + } + + if (dir != DMA_FROM_DEVICE) + return; + + dma_mark_clean(dma_addr, size); } void @@ -746,13 +767,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr, unsigned long offset, size_t size, int dir, int target) { - char *dma_addr = swiotlb_bus_to_virt(dev_addr) + offset; - - BUG_ON(dir == DMA_NONE); - if (is_swiotlb_buffer(dma_addr)) - sync_single(hwdev, dma_addr, size, dir, target); - else if (dir == DMA_FROM_DEVICE) - dma_mark_clean(dma_addr, size); + swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target); } void @@ -777,7 +792,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device); /* * Map a set of buffers described by scatterlist in streaming mode for DMA. - * This is the scatter-gather version of the above swiotlb_map_single + * This is the scatter-gather version of the above swiotlb_map_page * interface. Here the scatter gather list elements are each tagged with the * appropriate dma address and length. They are obtained via * sg_dma_{address,length}(SG). @@ -788,7 +803,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device); * The routine returns the number of addr/length pairs actually * used, at most nents. * - * Device ownership issues as mentioned above for swiotlb_map_single are the + * Device ownership issues as mentioned above for swiotlb_map_page are the * same here. */ int @@ -836,7 +851,7 @@ EXPORT_SYMBOL(swiotlb_map_sg); /* * Unmap a set of streaming mode DMA translations. Again, cpu read rules - * concerning calls here are the same as for swiotlb_unmap_single() above. + * concerning calls here are the same as for swiotlb_unmap_page() above. */ void swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, @@ -847,13 +862,9 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, BUG_ON(dir == DMA_NONE); - for_each_sg(sgl, sg, nelems, i) { - if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg))) - unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address), - sg->dma_length, dir); - else if (dir == DMA_FROM_DEVICE) - dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length); - } + for_each_sg(sgl, sg, nelems, i) + unmap_single(hwdev, sg->dma_address, sg->dma_length, dir); + } EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); @@ -879,15 +890,9 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, struct scatterlist *sg; int i; - BUG_ON(dir == DMA_NONE); - - for_each_sg(sgl, sg, nelems, i) { - if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg))) - sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address), + for_each_sg(sgl, sg, nelems, i) + swiotlb_sync_single(hwdev, sg->dma_address, sg->dma_length, dir, target); - else if (dir == DMA_FROM_DEVICE) - dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length); - } } void diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 7536acea135b..756ccafa9cec 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -408,6 +408,8 @@ enum format_type { FORMAT_TYPE_LONG_LONG, FORMAT_TYPE_ULONG, FORMAT_TYPE_LONG, + FORMAT_TYPE_UBYTE, + FORMAT_TYPE_BYTE, FORMAT_TYPE_USHORT, FORMAT_TYPE_SHORT, FORMAT_TYPE_UINT, @@ -573,12 +575,15 @@ static char *string(char *buf, char *end, char *s, struct printf_spec spec) } static char *symbol_string(char *buf, char *end, void *ptr, - struct printf_spec spec) + struct printf_spec spec, char ext) { unsigned long value = (unsigned long) ptr; #ifdef CONFIG_KALLSYMS char sym[KSYM_SYMBOL_LEN]; - sprint_symbol(sym, value); + if (ext != 'f') + sprint_symbol(sym, value); + else + kallsyms_lookup(value, NULL, NULL, NULL, sym); return string(buf, end, sym, spec); #else spec.field_width = 2*sizeof(void *); @@ -690,7 +695,8 @@ static char *ip4_addr_string(char *buf, char *end, u8 *addr, * * Right now we handle: * - * - 'F' For symbolic function descriptor pointers + * - 'F' For symbolic function descriptor pointers with offset + * - 'f' For simple symbolic function names without offset * - 'S' For symbolic direct pointers * - 'R' For a struct resource pointer, it prints the range of * addresses (not the name nor the flags) @@ -713,10 +719,11 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, switch (*fmt) { case 'F': + case 'f': ptr = dereference_function_descriptor(ptr); /* Fallthrough */ case 'S': - return symbol_string(buf, end, ptr, spec); + return symbol_string(buf, end, ptr, spec, *fmt); case 'R': return resource_string(buf, end, ptr, spec); case 'm': @@ -853,11 +860,15 @@ qualifier: spec->qualifier = -1; if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt == 'Z' || *fmt == 'z' || *fmt == 't') { - spec->qualifier = *fmt; - ++fmt; - if (spec->qualifier == 'l' && *fmt == 'l') { - spec->qualifier = 'L'; - ++fmt; + spec->qualifier = *fmt++; + if (unlikely(spec->qualifier == *fmt)) { + if (spec->qualifier == 'l') { + spec->qualifier = 'L'; + ++fmt; + } else if (spec->qualifier == 'h') { + spec->qualifier = 'H'; + ++fmt; + } } } @@ -919,6 +930,11 @@ qualifier: spec->type = FORMAT_TYPE_SIZE_T; } else if (spec->qualifier == 't') { spec->type = FORMAT_TYPE_PTRDIFF; + } else if (spec->qualifier == 'H') { + if (spec->flags & SIGN) + spec->type = FORMAT_TYPE_BYTE; + else + spec->type = FORMAT_TYPE_UBYTE; } else if (spec->qualifier == 'h') { if (spec->flags & SIGN) spec->type = FORMAT_TYPE_SHORT; @@ -943,7 +959,8 @@ qualifier: * * This function follows C99 vsnprintf, but has some extensions: * %pS output the name of a text symbol - * %pF output the name of a function pointer + * %pF output the name of a function pointer with its offset + * %pf output the name of a function pointer without its offset * %pR output the address range in a struct resource * * The return value is the number of characters which would @@ -1087,6 +1104,12 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) case FORMAT_TYPE_PTRDIFF: num = va_arg(args, ptrdiff_t); break; + case FORMAT_TYPE_UBYTE: + num = (unsigned char) va_arg(args, int); + break; + case FORMAT_TYPE_BYTE: + num = (signed char) va_arg(args, int); + break; case FORMAT_TYPE_USHORT: num = (unsigned short) va_arg(args, int); break; @@ -1363,6 +1386,10 @@ do { \ case FORMAT_TYPE_PTRDIFF: save_arg(ptrdiff_t); break; + case FORMAT_TYPE_UBYTE: + case FORMAT_TYPE_BYTE: + save_arg(char); + break; case FORMAT_TYPE_USHORT: case FORMAT_TYPE_SHORT: save_arg(short); @@ -1391,7 +1418,8 @@ EXPORT_SYMBOL_GPL(vbin_printf); * * The format follows C99 vsnprintf, but has some extensions: * %pS output the name of a text symbol - * %pF output the name of a function pointer + * %pF output the name of a function pointer with its offset + * %pf output the name of a function pointer without its offset * %pR output the address range in a struct resource * %n is ignored * @@ -1538,6 +1566,12 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) case FORMAT_TYPE_PTRDIFF: num = get_arg(ptrdiff_t); break; + case FORMAT_TYPE_UBYTE: + num = get_arg(unsigned char); + break; + case FORMAT_TYPE_BYTE: + num = get_arg(signed char); + break; case FORMAT_TYPE_USHORT: num = get_arg(unsigned short); break; diff --git a/mm/filemap.c b/mm/filemap.c index 379ff0bcbf6e..1b60f30cebfa 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -121,7 +121,6 @@ void __remove_from_page_cache(struct page *page) mapping->nrpages--; __dec_zone_page_state(page, NR_FILE_PAGES); BUG_ON(page_mapped(page)); - mem_cgroup_uncharge_cache_page(page); /* * Some filesystems seem to re-dirty the page even after @@ -145,6 +144,7 @@ void remove_from_page_cache(struct page *page) spin_lock_irq(&mapping->tree_lock); __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); + mem_cgroup_uncharge_cache_page(page); } static int sync_page(void *word) @@ -476,13 +476,13 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, if (likely(!error)) { mapping->nrpages++; __inc_zone_page_state(page, NR_FILE_PAGES); + spin_unlock_irq(&mapping->tree_lock); } else { page->mapping = NULL; + spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); page_cache_release(page); } - - spin_unlock_irq(&mapping->tree_lock); radix_tree_preload_end(); } else mem_cgroup_uncharge_cache_page(page); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 28c655ba9353..e83ad2c9228c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -316,7 +316,7 @@ static void resv_map_release(struct kref *ref) static struct resv_map *vma_resv_map(struct vm_area_struct *vma) { VM_BUG_ON(!is_vm_hugetlb_page(vma)); - if (!(vma->vm_flags & VM_SHARED)) + if (!(vma->vm_flags & VM_MAYSHARE)) return (struct resv_map *)(get_vma_private_data(vma) & ~HPAGE_RESV_MASK); return NULL; @@ -325,7 +325,7 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma) static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map) { VM_BUG_ON(!is_vm_hugetlb_page(vma)); - VM_BUG_ON(vma->vm_flags & VM_SHARED); + VM_BUG_ON(vma->vm_flags & VM_MAYSHARE); set_vma_private_data(vma, (get_vma_private_data(vma) & HPAGE_RESV_MASK) | (unsigned long)map); @@ -334,7 +334,7 @@ static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map) static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags) { VM_BUG_ON(!is_vm_hugetlb_page(vma)); - VM_BUG_ON(vma->vm_flags & VM_SHARED); + VM_BUG_ON(vma->vm_flags & VM_MAYSHARE); set_vma_private_data(vma, get_vma_private_data(vma) | flags); } @@ -353,7 +353,7 @@ static void decrement_hugepage_resv_vma(struct hstate *h, if (vma->vm_flags & VM_NORESERVE) return; - if (vma->vm_flags & VM_SHARED) { + if (vma->vm_flags & VM_MAYSHARE) { /* Shared mappings always use reserves */ h->resv_huge_pages--; } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { @@ -369,14 +369,14 @@ static void decrement_hugepage_resv_vma(struct hstate *h, void reset_vma_resv_huge_pages(struct vm_area_struct *vma) { VM_BUG_ON(!is_vm_hugetlb_page(vma)); - if (!(vma->vm_flags & VM_SHARED)) + if (!(vma->vm_flags & VM_MAYSHARE)) vma->vm_private_data = (void *)0; } /* Returns true if the VMA has associated reserve pages */ static int vma_has_reserves(struct vm_area_struct *vma) { - if (vma->vm_flags & VM_SHARED) + if (vma->vm_flags & VM_MAYSHARE) return 1; if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) return 1; @@ -924,7 +924,7 @@ static long vma_needs_reservation(struct hstate *h, struct address_space *mapping = vma->vm_file->f_mapping; struct inode *inode = mapping->host; - if (vma->vm_flags & VM_SHARED) { + if (vma->vm_flags & VM_MAYSHARE) { pgoff_t idx = vma_hugecache_offset(h, vma, addr); return region_chg(&inode->i_mapping->private_list, idx, idx + 1); @@ -949,7 +949,7 @@ static void vma_commit_reservation(struct hstate *h, struct address_space *mapping = vma->vm_file->f_mapping; struct inode *inode = mapping->host; - if (vma->vm_flags & VM_SHARED) { + if (vma->vm_flags & VM_MAYSHARE) { pgoff_t idx = vma_hugecache_offset(h, vma, addr); region_add(&inode->i_mapping->private_list, idx, idx + 1); @@ -1893,7 +1893,7 @@ retry_avoidcopy: * at the time of fork() could consume its reserves on COW instead * of the full address range. */ - if (!(vma->vm_flags & VM_SHARED) && + if (!(vma->vm_flags & VM_MAYSHARE) && is_vma_resv_set(vma, HPAGE_RESV_OWNER) && old_page != pagecache_page) outside_reserve = 1; @@ -2000,7 +2000,7 @@ retry: clear_huge_page(page, address, huge_page_size(h)); __SetPageUptodate(page); - if (vma->vm_flags & VM_SHARED) { + if (vma->vm_flags & VM_MAYSHARE) { int err; struct inode *inode = mapping->host; @@ -2104,7 +2104,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, goto out_mutex; } - if (!(vma->vm_flags & VM_SHARED)) + if (!(vma->vm_flags & VM_MAYSHARE)) pagecache_page = hugetlbfs_pagecache_page(h, vma, address); } @@ -2289,7 +2289,7 @@ int hugetlb_reserve_pages(struct inode *inode, * to reserve the full area even if read-only as mprotect() may be * called to make the mapping read-write. Assume !vma is a shm mapping */ - if (!vma || vma->vm_flags & VM_SHARED) + if (!vma || vma->vm_flags & VM_MAYSHARE) chg = region_chg(&inode->i_mapping->private_list, from, to); else { struct resv_map *resv_map = resv_map_alloc(); @@ -2330,7 +2330,7 @@ int hugetlb_reserve_pages(struct inode *inode, * consumed reservations are stored in the map. Hence, nothing * else has to be done for private mappings here */ - if (!vma || vma->vm_flags & VM_SHARED) + if (!vma || vma->vm_flags & VM_MAYSHARE) region_add(&inode->i_mapping->private_list, from, to); return 0; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 01c2d8f14685..78eb8552818b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -314,14 +314,6 @@ static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) return mem; } -static bool mem_cgroup_is_obsolete(struct mem_cgroup *mem) -{ - if (!mem) - return true; - return css_is_removed(&mem->css); -} - - /* * Call callback function against all cgroup under hierarchy tree. */ @@ -932,7 +924,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, if (unlikely(!mem)) return 0; - VM_BUG_ON(!mem || mem_cgroup_is_obsolete(mem)); + VM_BUG_ON(css_is_removed(&mem->css)); while (1) { int ret; @@ -1488,8 +1480,9 @@ void mem_cgroup_uncharge_cache_page(struct page *page) __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); } +#ifdef CONFIG_SWAP /* - * called from __delete_from_swap_cache() and drop "page" account. + * called after __delete_from_swap_cache() and drop "page" account. * memcg information is recorded to swap_cgroup of "ent" */ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) @@ -1506,6 +1499,7 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) if (memcg) css_put(&memcg->css); } +#endif #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP /* diff --git a/mm/mmzone.c b/mm/mmzone.c index 16ce8b955dcf..f5b7d1760213 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -6,6 +6,7 @@ #include <linux/stddef.h> +#include <linux/mm.h> #include <linux/mmzone.h> #include <linux/module.h> @@ -72,3 +73,17 @@ struct zoneref *next_zones_zonelist(struct zoneref *z, *zone = zonelist_zone(z); return z; } + +#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL +int memmap_valid_within(unsigned long pfn, + struct page *page, struct zone *zone) +{ + if (page_to_pfn(page) != pfn) + return 0; + + if (page_zone(page) != zone) + return 0; + + return 1; +} +#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 92bcf1db16b2..a7b2460e922b 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -284,22 +284,28 @@ static void dump_tasks(const struct mem_cgroup *mem) printk(KERN_INFO "[ pid ] uid tgid total_vm rss cpu oom_adj " "name\n"); do_each_thread(g, p) { - /* - * total_vm and rss sizes do not exist for tasks with a - * detached mm so there's no need to report them. - */ - if (!p->mm) - continue; + struct mm_struct *mm; + if (mem && !task_in_mem_cgroup(p, mem)) continue; if (!thread_group_leader(p)) continue; task_lock(p); + mm = p->mm; + if (!mm) { + /* + * total_vm and rss sizes do not exist for tasks with no + * mm so there's no need to report them; they can't be + * oom killed anyway. + */ + task_unlock(p); + continue; + } printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", - p->pid, __task_cred(p)->uid, p->tgid, - p->mm->total_vm, get_mm_rss(p->mm), (int)task_cpu(p), - p->oomkilladj, p->comm); + p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm, + get_mm_rss(mm), (int)task_cpu(p), p->oomkilladj, + p->comm); task_unlock(p); } while_each_thread(g, p); } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 30351f0063ac..bb553c3e955d 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -94,12 +94,12 @@ unsigned long vm_dirty_bytes; /* * The interval between `kupdate'-style writebacks */ -unsigned int dirty_writeback_interval = 5 * 100; /* sentiseconds */ +unsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */ /* * The longest time for which data is allowed to remain dirty */ -unsigned int dirty_expire_interval = 30 * 100; /* sentiseconds */ +unsigned int dirty_expire_interval = 30 * 100; /* centiseconds */ /* * Flag that makes the machine dump writes/reads and block dirtyings. @@ -770,7 +770,7 @@ static void wb_kupdate(unsigned long arg) sync_supers(); - oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval); + oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10); start_jif = jiffies; next_jif = start_jif + msecs_to_jiffies(dirty_writeback_interval * 10); nr_to_write = global_page_state(NR_FILE_DIRTY) + diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fe753ecf2aa5..474c7e9dd51a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -149,10 +149,6 @@ static unsigned long __meminitdata dma_reserve; static int __meminitdata nr_nodemap_entries; static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; -#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE - static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES]; - static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES]; -#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */ static unsigned long __initdata required_kernelcore; static unsigned long __initdata required_movablecore; static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; @@ -3103,64 +3099,6 @@ void __init sparse_memory_present_with_active_regions(int nid) } /** - * push_node_boundaries - Push node boundaries to at least the requested boundary - * @nid: The nid of the node to push the boundary for - * @start_pfn: The start pfn of the node - * @end_pfn: The end pfn of the node - * - * In reserve-based hot-add, mem_map is allocated that is unused until hotadd - * time. Specifically, on x86_64, SRAT will report ranges that can potentially - * be hotplugged even though no physical memory exists. This function allows - * an arch to push out the node boundaries so mem_map is allocated that can - * be used later. - */ -#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE -void __init push_node_boundaries(unsigned int nid, - unsigned long start_pfn, unsigned long end_pfn) -{ - mminit_dprintk(MMINIT_TRACE, "zoneboundary", - "Entering push_node_boundaries(%u, %lu, %lu)\n", - nid, start_pfn, end_pfn); - - /* Initialise the boundary for this node if necessary */ - if (node_boundary_end_pfn[nid] == 0) - node_boundary_start_pfn[nid] = -1UL; - - /* Update the boundaries */ - if (node_boundary_start_pfn[nid] > start_pfn) - node_boundary_start_pfn[nid] = start_pfn; - if (node_boundary_end_pfn[nid] < end_pfn) - node_boundary_end_pfn[nid] = end_pfn; -} - -/* If necessary, push the node boundary out for reserve hotadd */ -static void __meminit account_node_boundary(unsigned int nid, - unsigned long *start_pfn, unsigned long *end_pfn) -{ - mminit_dprintk(MMINIT_TRACE, "zoneboundary", - "Entering account_node_boundary(%u, %lu, %lu)\n", - nid, *start_pfn, *end_pfn); - - /* Return if boundary information has not been provided */ - if (node_boundary_end_pfn[nid] == 0) - return; - - /* Check the boundaries and update if necessary */ - if (node_boundary_start_pfn[nid] < *start_pfn) - *start_pfn = node_boundary_start_pfn[nid]; - if (node_boundary_end_pfn[nid] > *end_pfn) - *end_pfn = node_boundary_end_pfn[nid]; -} -#else -void __init push_node_boundaries(unsigned int nid, - unsigned long start_pfn, unsigned long end_pfn) {} - -static void __meminit account_node_boundary(unsigned int nid, - unsigned long *start_pfn, unsigned long *end_pfn) {} -#endif - - -/** * get_pfn_range_for_nid - Return the start and end page frames for a node * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned. * @start_pfn: Passed by reference. On return, it will have the node start_pfn. @@ -3185,9 +3123,6 @@ void __meminit get_pfn_range_for_nid(unsigned int nid, if (*start_pfn == -1UL) *start_pfn = 0; - - /* Push the node boundaries out if requested */ - account_node_boundary(nid, start_pfn, end_pfn); } /* @@ -3793,10 +3728,6 @@ void __init remove_all_active_ranges(void) { memset(early_node_map, 0, sizeof(early_node_map)); nr_nodemap_entries = 0; -#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE - memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn)); - memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn)); -#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */ } /* Compare two active node_active_regions */ diff --git a/mm/percpu.c b/mm/percpu.c index 1aa5d8fbca12..c0b2c1a76e81 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -23,7 +23,7 @@ * Allocation is done in offset-size areas of single unit space. Ie, * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0, * c1:u1, c1:u2 and c1:u3. Percpu access can be done by configuring - * percpu base registers UNIT_SIZE apart. + * percpu base registers pcpu_unit_size apart. * * There are usually many small percpu allocations many of them as * small as 4 bytes. The allocator organizes chunks into lists @@ -38,8 +38,8 @@ * region and negative allocated. Allocation inside a chunk is done * by scanning this map sequentially and serving the first matching * entry. This is mostly copied from the percpu_modalloc() allocator. - * Chunks are also linked into a rb tree to ease address to chunk - * mapping during free. + * Chunks can be determined from the address using the index field + * in the page struct. The index field contains a pointer to the chunk. * * To use this allocator, arch code should do the followings. * @@ -61,7 +61,6 @@ #include <linux/mutex.h> #include <linux/percpu.h> #include <linux/pfn.h> -#include <linux/rbtree.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/vmalloc.h> @@ -88,7 +87,6 @@ struct pcpu_chunk { struct list_head list; /* linked to pcpu_slot lists */ - struct rb_node rb_node; /* key is chunk->vm->addr */ int free_size; /* free bytes in the chunk */ int contig_hint; /* max contiguous size hint */ struct vm_struct *vm; /* mapped vmalloc region */ @@ -110,9 +108,21 @@ static size_t pcpu_chunk_struct_size __read_mostly; void *pcpu_base_addr __read_mostly; EXPORT_SYMBOL_GPL(pcpu_base_addr); -/* optional reserved chunk, only accessible for reserved allocations */ +/* + * The first chunk which always exists. Note that unlike other + * chunks, this one can be allocated and mapped in several different + * ways and thus often doesn't live in the vmalloc area. + */ +static struct pcpu_chunk *pcpu_first_chunk; + +/* + * Optional reserved chunk. This chunk reserves part of the first + * chunk and serves it for reserved allocations. The amount of + * reserved offset is in pcpu_reserved_chunk_limit. When reserved + * area doesn't exist, the following variables contain NULL and 0 + * respectively. + */ static struct pcpu_chunk *pcpu_reserved_chunk; -/* offset limit of the reserved chunk */ static int pcpu_reserved_chunk_limit; /* @@ -121,7 +131,7 @@ static int pcpu_reserved_chunk_limit; * There are two locks - pcpu_alloc_mutex and pcpu_lock. The former * protects allocation/reclaim paths, chunks and chunk->page arrays. * The latter is a spinlock and protects the index data structures - - * chunk slots, rbtree, chunks and area maps in chunks. + * chunk slots, chunks and area maps in chunks. * * During allocation, pcpu_alloc_mutex is kept locked all the time and * pcpu_lock is grabbed and released as necessary. All actual memory @@ -140,7 +150,6 @@ static DEFINE_MUTEX(pcpu_alloc_mutex); /* protects whole alloc and reclaim */ static DEFINE_SPINLOCK(pcpu_lock); /* protects index data structures */ static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ -static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */ /* reclaim work to release fully free chunks, scheduled from free path */ static void pcpu_reclaim(struct work_struct *work); @@ -191,6 +200,18 @@ static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk, return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL; } +/* set the pointer to a chunk in a page struct */ +static void pcpu_set_page_chunk(struct page *page, struct pcpu_chunk *pcpu) +{ + page->index = (unsigned long)pcpu; +} + +/* obtain pointer to a chunk from a page struct */ +static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page) +{ + return (struct pcpu_chunk *)page->index; +} + /** * pcpu_mem_alloc - allocate memory * @size: bytes to allocate @@ -257,93 +278,26 @@ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) } } -static struct rb_node **pcpu_chunk_rb_search(void *addr, - struct rb_node **parentp) -{ - struct rb_node **p = &pcpu_addr_root.rb_node; - struct rb_node *parent = NULL; - struct pcpu_chunk *chunk; - - while (*p) { - parent = *p; - chunk = rb_entry(parent, struct pcpu_chunk, rb_node); - - if (addr < chunk->vm->addr) - p = &(*p)->rb_left; - else if (addr > chunk->vm->addr) - p = &(*p)->rb_right; - else - break; - } - - if (parentp) - *parentp = parent; - return p; -} - /** - * pcpu_chunk_addr_search - search for chunk containing specified address - * @addr: address to search for - * - * Look for chunk which might contain @addr. More specifically, it - * searchs for the chunk with the highest start address which isn't - * beyond @addr. - * - * CONTEXT: - * pcpu_lock. + * pcpu_chunk_addr_search - determine chunk containing specified address + * @addr: address for which the chunk needs to be determined. * * RETURNS: * The address of the found chunk. */ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) { - struct rb_node *n, *parent; - struct pcpu_chunk *chunk; + void *first_start = pcpu_first_chunk->vm->addr; - /* is it in the reserved chunk? */ - if (pcpu_reserved_chunk) { - void *start = pcpu_reserved_chunk->vm->addr; - - if (addr >= start && addr < start + pcpu_reserved_chunk_limit) + /* is it in the first chunk? */ + if (addr >= first_start && addr < first_start + pcpu_chunk_size) { + /* is it in the reserved area? */ + if (addr < first_start + pcpu_reserved_chunk_limit) return pcpu_reserved_chunk; + return pcpu_first_chunk; } - /* nah... search the regular ones */ - n = *pcpu_chunk_rb_search(addr, &parent); - if (!n) { - /* no exactly matching chunk, the parent is the closest */ - n = parent; - BUG_ON(!n); - } - chunk = rb_entry(n, struct pcpu_chunk, rb_node); - - if (addr < chunk->vm->addr) { - /* the parent was the next one, look for the previous one */ - n = rb_prev(n); - BUG_ON(!n); - chunk = rb_entry(n, struct pcpu_chunk, rb_node); - } - - return chunk; -} - -/** - * pcpu_chunk_addr_insert - insert chunk into address rb tree - * @new: chunk to insert - * - * Insert @new into address rb tree. - * - * CONTEXT: - * pcpu_lock. - */ -static void pcpu_chunk_addr_insert(struct pcpu_chunk *new) -{ - struct rb_node **p, *parent; - - p = pcpu_chunk_rb_search(new->vm->addr, &parent); - BUG_ON(*p); - rb_link_node(&new->rb_node, parent, p); - rb_insert_color(&new->rb_node, &pcpu_addr_root); + return pcpu_get_page_chunk(vmalloc_to_page(addr)); } /** @@ -755,6 +709,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) alloc_mask, 0); if (!*pagep) goto err; + pcpu_set_page_chunk(*pagep, chunk); } } @@ -879,7 +834,6 @@ restart: spin_lock_irq(&pcpu_lock); pcpu_chunk_relocate(chunk, -1); - pcpu_chunk_addr_insert(chunk); goto restart; area_found: @@ -968,7 +922,6 @@ static void pcpu_reclaim(struct work_struct *work) if (chunk == list_first_entry(head, struct pcpu_chunk, list)) continue; - rb_erase(&chunk->rb_node, &pcpu_addr_root); list_move(&chunk->list, &todo); } @@ -1147,7 +1100,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, if (reserved_size) { schunk->free_size = reserved_size; - pcpu_reserved_chunk = schunk; /* not for dynamic alloc */ + pcpu_reserved_chunk = schunk; + pcpu_reserved_chunk_limit = static_size + reserved_size; } else { schunk->free_size = dyn_size; dyn_size = 0; /* dynamic area covered */ @@ -1158,8 +1112,6 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, if (schunk->free_size) schunk->map[schunk->map_used++] = schunk->free_size; - pcpu_reserved_chunk_limit = static_size + schunk->free_size; - /* init dynamic chunk if necessary */ if (dyn_size) { dchunk = alloc_bootmem(sizeof(struct pcpu_chunk)); @@ -1226,13 +1178,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, } /* link the first chunk in */ - if (!dchunk) { - pcpu_chunk_relocate(schunk, -1); - pcpu_chunk_addr_insert(schunk); - } else { - pcpu_chunk_relocate(dchunk, -1); - pcpu_chunk_addr_insert(dchunk); - } + pcpu_first_chunk = dchunk ?: schunk; + pcpu_chunk_relocate(pcpu_first_chunk, -1); /* we're done */ pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); diff --git a/mm/rmap.c b/mm/rmap.c index 16521664010d..23122af32611 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -14,7 +14,7 @@ * Original design by Rik van Riel <riel@conectiva.com.br> 2001 * File methods by Dave McCracken <dmccr@us.ibm.com> 2003, 2004 * Anonymous methods by Andrea Arcangeli <andrea@suse.de> 2004 - * Contributions by Hugh Dickins <hugh@veritas.com> 2003, 2004 + * Contributions by Hugh Dickins 2003, 2004 */ /* diff --git a/mm/slob.c b/mm/slob.c index 494f05f19417..9b1737b0787b 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -60,6 +60,7 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/mm.h> +#include <linux/swap.h> /* struct reclaim_state */ #include <linux/cache.h> #include <linux/init.h> #include <linux/module.h> @@ -255,6 +256,8 @@ static void *slob_new_pages(gfp_t gfp, int order, int node) static void slob_free_pages(void *b, int order) { + if (current->reclaim_state) + current->reclaim_state->reclaimed_slab += 1 << order; free_pages((unsigned long)b, order); } @@ -407,7 +410,7 @@ static void slob_free(void *block, int size) spin_unlock_irqrestore(&slob_lock, flags); clear_slob_page(sp); free_slob_page(sp); - free_page((unsigned long)b); + slob_free_pages(b, 0); return; } diff --git a/mm/slub.c b/mm/slub.c index ea9e7160e2e7..5e805a6fe36c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -9,6 +9,7 @@ */ #include <linux/mm.h> +#include <linux/swap.h> /* struct reclaim_state */ #include <linux/module.h> #include <linux/bit_spinlock.h> #include <linux/interrupt.h> @@ -1170,6 +1171,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page) __ClearPageSlab(page); reset_page_mapcount(page); + if (current->reclaim_state) + current->reclaim_state->reclaimed_slab += pages; __free_pages(page, order); } @@ -1909,7 +1912,7 @@ static inline int calculate_order(int size) * Doh this slab cannot be placed using slub_max_order. */ order = slab_order(size, 1, MAX_ORDER, 1); - if (order <= MAX_ORDER) + if (order < MAX_ORDER) return order; return -ENOSYS; } @@ -2522,6 +2525,7 @@ __setup("slub_min_order=", setup_slub_min_order); static int __init setup_slub_max_order(char *str) { get_option(&str, &slub_max_order); + slub_max_order = min(slub_max_order, MAX_ORDER - 1); return 1; } diff --git a/mm/swap_state.c b/mm/swap_state.c index 3ecea98ecb45..1416e7e9e02d 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -109,8 +109,6 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) */ void __delete_from_swap_cache(struct page *page) { - swp_entry_t ent = {.val = page_private(page)}; - VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(!PageSwapCache(page)); VM_BUG_ON(PageWriteback(page)); @@ -121,7 +119,6 @@ void __delete_from_swap_cache(struct page *page) total_swapcache_pages--; __dec_zone_page_state(page, NR_FILE_PAGES); INC_CACHE_INFO(del_total); - mem_cgroup_uncharge_swapcache(page, ent); } /** @@ -191,6 +188,7 @@ void delete_from_swap_cache(struct page *page) __delete_from_swap_cache(page); spin_unlock_irq(&swapper_space.tree_lock); + mem_cgroup_uncharge_swapcache(page, entry); swap_free(entry); page_cache_release(page); } diff --git a/mm/truncate.c b/mm/truncate.c index 55206fab7b99..12e1579f9165 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -359,6 +359,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) BUG_ON(page_has_private(page)); __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); + mem_cgroup_uncharge_cache_page(page); page_cache_release(page); /* pagecache ref */ return 1; failed: diff --git a/mm/vmscan.c b/mm/vmscan.c index 5fa3eda1f03f..d254306562cd 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -470,10 +470,12 @@ static int __remove_mapping(struct address_space *mapping, struct page *page) swp_entry_t swap = { .val = page_private(page) }; __delete_from_swap_cache(page); spin_unlock_irq(&mapping->tree_lock); + mem_cgroup_uncharge_swapcache(page, swap); swap_free(swap); } else { __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); + mem_cgroup_uncharge_cache_page(page); } return 1; diff --git a/mm/vmstat.c b/mm/vmstat.c index 66f6130976cb..74d66dba0cbe 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -509,22 +509,11 @@ static void pagetypeinfo_showblockcount_print(struct seq_file *m, continue; page = pfn_to_page(pfn); -#ifdef CONFIG_ARCH_FLATMEM_HAS_HOLES - /* - * Ordinarily, memory holes in flatmem still have a valid - * memmap for the PFN range. However, an architecture for - * embedded systems (e.g. ARM) can free up the memmap backing - * holes to save memory on the assumption the memmap is - * never used. The page_zone linkages are then broken even - * though pfn_valid() returns true. Skip the page if the - * linkages are broken. Even if this test passed, the impact - * is that the counters for the movable type are off but - * fragmentation monitoring is likely meaningless on small - * systems. - */ - if (page_zone(page) != zone) + + /* Watch for unexpected holes punched in the memmap */ + if (!memmap_valid_within(pfn, page, zone)) continue; -#endif + mtype = get_pageblock_migratetype(page); if (mtype < MIGRATE_TYPES) diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 4cc3624bd22d..95f7a7a544b4 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -90,9 +90,6 @@ static void add_conn(struct work_struct *work) struct hci_conn *conn = container_of(work, struct hci_conn, work_add); struct hci_dev *hdev = conn->hdev; - /* ensure previous del is complete */ - flush_work(&conn->work_del); - dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); if (device_add(&conn->dev) < 0) { @@ -118,9 +115,6 @@ static void del_conn(struct work_struct *work) struct hci_conn *conn = container_of(work, struct hci_conn, work_del); struct hci_dev *hdev = conn->hdev; - /* ensure previous add is complete */ - flush_work(&conn->work_add); - if (!device_is_registered(&conn->dev)) return; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 30b88777c3df..5ee1a3682bf2 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -134,6 +134,10 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_PAUSE)) goto drop; + /* If STP is turned off, then forward */ + if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0) + goto forward; + if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, NULL, br_handle_local_finish)) return NULL; /* frame consumed by filter */ @@ -141,6 +145,7 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) return skb; /* continue processing */ } +forward: switch (p->state) { case BR_STATE_FORWARDING: rhook = rcu_dereference(br_should_route_hook); diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 6e63ec3f1fcf..0660515f3992 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -297,6 +297,9 @@ void br_topology_change_detection(struct net_bridge *br) { int isroot = br_is_root_bridge(br); + if (br->stp_enabled != BR_KERNEL_STP) + return; + pr_info("%s: topology change detected, %s\n", br->dev->name, isroot ? "propagating" : "sending tcn bpdu"); diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 9cc9f95b109e..6d62d4618cfc 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -66,9 +66,9 @@ NOTES. - * The stored value for avbps is scaled by 2^5, so that maximal - rate is ~1Gbit, avpps is scaled by 2^10. - + * avbps is scaled by 2^5, avpps is scaled by 2^10. + * both values are reported as 32 bit unsigned values. bps can + overflow for fast links : max speed being 34360Mbit/sec * Minimal interval is HZ/4=250msec (it is the greatest common divisor for HZ=100 and HZ=1024 8)), maximal interval is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals @@ -86,9 +86,9 @@ struct gen_estimator spinlock_t *stats_lock; int ewma_log; u64 last_bytes; + u64 avbps; u32 last_packets; u32 avpps; - u32 avbps; struct rcu_head e_rcu; struct rb_node node; }; @@ -115,6 +115,7 @@ static void est_timer(unsigned long arg) rcu_read_lock(); list_for_each_entry_rcu(e, &elist[idx].list, list) { u64 nbytes; + u64 brate; u32 npackets; u32 rate; @@ -125,9 +126,9 @@ static void est_timer(unsigned long arg) nbytes = e->bstats->bytes; npackets = e->bstats->packets; - rate = (nbytes - e->last_bytes)<<(7 - idx); + brate = (nbytes - e->last_bytes)<<(7 - idx); e->last_bytes = nbytes; - e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log; + e->avbps += ((s64)(brate - e->avbps)) >> e->ewma_log; e->rate_est->bps = (e->avbps+0xF)>>5; rate = (npackets - e->last_packets)<<(12 - idx); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index b5873bdff612..64f51eec6576 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -175,9 +175,13 @@ static void service_arp_queue(struct netpoll_info *npi) void netpoll_poll(struct netpoll *np) { struct net_device *dev = np->dev; - const struct net_device_ops *ops = dev->netdev_ops; + const struct net_device_ops *ops; + + if (!dev || !netif_running(dev)) + return; - if (!dev || !netif_running(dev) || !ops->ndo_poll_controller) + ops = dev->netdev_ops; + if (!ops->ndo_poll_controller) return; /* Process pending work on NIC */ diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 3779c1438c11..0666a827bc62 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2447,7 +2447,7 @@ static inline void free_SAs(struct pktgen_dev *pkt_dev) if (pkt_dev->cflows) { /* let go of the SAs if we have them */ int i = 0; - for (; i < pkt_dev->nflows; i++){ + for (; i < pkt_dev->cflows; i++) { struct xfrm_state *x = pkt_dev->flows[i].x; if (x) { xfrm_state_put(x); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c4906bbcd60e..c2e4fb8f3546 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2288,7 +2288,7 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, next_skb: block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; - if (abs_offset < block_limit) { + if (abs_offset < block_limit && !st->frag_data) { *data = st->cur_skb->data + (abs_offset - st->stepped_offset); return block_limit - abs_offset; } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 9d26a3da37e5..5b919f7b45db 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -408,7 +408,7 @@ config INET_XFRM_MODE_BEET config INET_LRO bool "Large Receive Offload (ipv4/tcp)" - + default y ---help--- Support for Large Receive Offload (ipv4/tcp). diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ec0ae490f0b6..33c7c85dfe40 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -986,9 +986,12 @@ fib_find_node(struct trie *t, u32 key) static struct node *trie_rebalance(struct trie *t, struct tnode *tn) { int wasfull; - t_key cindex, key = tn->key; + t_key cindex, key; struct tnode *tp; + preempt_disable(); + key = tn->key; + while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); @@ -1007,6 +1010,7 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) if (IS_TNODE(tn)) tn = (struct tnode *)resize(t, (struct tnode *)tn); + preempt_enable(); return (struct node *)tn; } diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 90d22ae0a419..88bf051d0cbb 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -139,6 +139,8 @@ __be32 ic_servaddr = NONE; /* Boot server IP address */ __be32 root_server_addr = NONE; /* Address of NFS server */ u8 root_server_path[256] = { 0, }; /* Path to mount as root */ +u32 ic_dev_xid; /* Device under configuration */ + /* vendor class identifier */ static char vendor_class_identifier[253] __initdata; @@ -932,6 +934,13 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str goto drop_unlock; } + /* Is it a reply for the device we are configuring? */ + if (b->xid != ic_dev_xid) { + if (net_ratelimit()) + printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet \n"); + goto drop_unlock; + } + /* Parse extensions */ if (ext_len >= 4 && !memcmp(b->exten, ic_bootp_cookie, 4)) { /* Check magic cookie */ @@ -1115,6 +1124,9 @@ static int __init ic_dynamic(void) get_random_bytes(&timeout, sizeof(timeout)); timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM); for (;;) { + /* Track the device we are configuring */ + ic_dev_xid = d->xid; + #ifdef IPCONFIG_BOOTP if (do_bootp && (d->able & IC_BOOTP)) ic_bootp_send_if(d, jiffies - start_jiffies); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c4c60e9f068a..28205e5bfa9b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -784,8 +784,8 @@ static void rt_check_expire(void) { static unsigned int rover; unsigned int i = rover, goal; - struct rtable *rth, **rthp; - unsigned long length = 0, samples = 0; + struct rtable *rth, *aux, **rthp; + unsigned long samples = 0; unsigned long sum = 0, sum2 = 0; u64 mult; @@ -795,9 +795,9 @@ static void rt_check_expire(void) goal = (unsigned int)mult; if (goal > rt_hash_mask) goal = rt_hash_mask + 1; - length = 0; for (; goal > 0; goal--) { unsigned long tmo = ip_rt_gc_timeout; + unsigned long length; i = (i + 1) & rt_hash_mask; rthp = &rt_hash_table[i].chain; @@ -809,8 +809,10 @@ static void rt_check_expire(void) if (*rthp == NULL) continue; + length = 0; spin_lock_bh(rt_hash_lock_addr(i)); while ((rth = *rthp) != NULL) { + prefetch(rth->u.dst.rt_next); if (rt_is_expired(rth)) { *rthp = rth->u.dst.rt_next; rt_free(rth); @@ -819,33 +821,30 @@ static void rt_check_expire(void) if (rth->u.dst.expires) { /* Entry is expired even if it is in use */ if (time_before_eq(jiffies, rth->u.dst.expires)) { +nofree: tmo >>= 1; rthp = &rth->u.dst.rt_next; /* - * Only bump our length if the hash - * inputs on entries n and n+1 are not - * the same, we only count entries on + * We only count entries on * a chain with equal hash inputs once * so that entries for different QOS * levels, and other non-hash input * attributes don't unfairly skew * the length computation */ - if ((*rthp == NULL) || - !compare_hash_inputs(&(*rthp)->fl, - &rth->fl)) - length += ONE; + for (aux = rt_hash_table[i].chain;;) { + if (aux == rth) { + length += ONE; + break; + } + if (compare_hash_inputs(&aux->fl, &rth->fl)) + break; + aux = aux->u.dst.rt_next; + } continue; } - } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { - tmo >>= 1; - rthp = &rth->u.dst.rt_next; - if ((*rthp == NULL) || - !compare_hash_inputs(&(*rthp)->fl, - &rth->fl)) - length += ONE; - continue; - } + } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) + goto nofree; /* Cleanup aged off entries. */ *rthp = rth->u.dst.rt_next; @@ -1068,7 +1067,6 @@ out: return 0; static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) { struct rtable *rth, **rthp; - struct rtable *rthi; unsigned long now; struct rtable *cand, **candp; u32 min_score; @@ -1088,7 +1086,6 @@ restart: } rthp = &rt_hash_table[hash].chain; - rthi = NULL; spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { @@ -1134,17 +1131,6 @@ restart: chain_length++; rthp = &rth->u.dst.rt_next; - - /* - * check to see if the next entry in the chain - * contains the same hash input values as rt. If it does - * This is where we will insert into the list, instead of - * at the head. This groups entries that differ by aspects not - * relvant to the hash function together, which we use to adjust - * our chain length - */ - if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl)) - rthi = rth; } if (cand) { @@ -1205,10 +1191,7 @@ restart: } } - if (rthi) - rt->u.dst.rt_next = rthi->u.dst.rt_next; - else - rt->u.dst.rt_next = rt_hash_table[hash].chain; + rt->u.dst.rt_next = rt_hash_table[hash].chain; #if RT_CACHE_DEBUG >= 2 if (rt->u.dst.rt_next) { @@ -1224,10 +1207,7 @@ restart: * previous writes to rt are comitted to memory * before making rt visible to other CPUS. */ - if (rthi) - rcu_assign_pointer(rthi->u.dst.rt_next, rt); - else - rcu_assign_pointer(rt_hash_table[hash].chain, rt); + rcu_assign_pointer(rt_hash_table[hash].chain, rt); spin_unlock_bh(rt_hash_lock_addr(hash)); *rp = rt; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1d7f49c6f0ca..7a0f0b27bf1f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1321,6 +1321,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct task_struct *user_recv = NULL; int copied_early = 0; struct sk_buff *skb; + u32 urg_hole = 0; lock_sock(sk); @@ -1532,7 +1533,8 @@ do_prequeue: } } } - if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) { + if ((flags & MSG_PEEK) && + (peek_seq - copied - urg_hole != tp->copied_seq)) { if (net_ratelimit()) printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", current->comm, task_pid_nr(current)); @@ -1553,6 +1555,7 @@ do_prequeue: if (!urg_offset) { if (!sock_flag(sk, SOCK_URGINLINE)) { ++*seq; + urg_hole++; offset++; used--; if (!used) diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index a453aac91bd3..c6743eec9b7d 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -158,6 +158,11 @@ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) } EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); +static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) +{ + return min(tp->snd_ssthresh, tp->snd_cwnd-1); +} + static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); @@ -221,11 +226,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) */ diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; - if (diff > gamma && tp->snd_ssthresh > 2 ) { + if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) { /* Going too fast. Time to slow down * and switch to congestion avoidance. */ - tp->snd_ssthresh = 2; /* Set cwnd to match the actual rate * exactly: @@ -235,6 +239,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * utilization. */ tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); + tp->snd_ssthresh = tcp_vegas_ssthresh(tp); } else if (tp->snd_cwnd <= tp->snd_ssthresh) { /* Slow start. */ @@ -250,6 +255,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * we slow down. */ tp->snd_cwnd--; + tp->snd_ssthresh + = tcp_vegas_ssthresh(tp); } else if (diff < alpha) { /* We don't have enough extra packets * in the network, so speed up. diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1394ddb6e35c..032a5ec391c5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -137,6 +137,7 @@ static struct rt6_info ip6_null_entry_template = { } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; @@ -159,6 +160,7 @@ static struct rt6_info ip6_prohibit_entry_template = { } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; @@ -176,6 +178,7 @@ static struct rt6_info ip6_blk_hole_entry_template = { } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 8e757dd53396..aee0d6bea309 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -22,6 +22,7 @@ #include <linux/netfilter/nfnetlink_conntrack.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_l4proto.h> +#include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_log.h> static DEFINE_RWLOCK(dccp_lock); @@ -553,6 +554,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.dccp.state = new_state; write_unlock_bh(&dccp_lock); + if (new_state != old_state) + nf_conntrack_event_cache(IPCT_PROTOINFO, ct); + dn = dccp_pernet(net); nf_ct_refresh_acct(ct, ctinfo, skb, dn->dccp_timeout[new_state]); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b5ccf2b4b2e7..97a6e93d742e 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -634,6 +634,14 @@ static bool tcp_in_window(const struct nf_conn *ct, sender->td_end = end; sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; } + if (tcph->ack) { + if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) { + sender->td_maxack = ack; + sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET; + } else if (after(ack, sender->td_maxack)) + sender->td_maxack = ack; + } + /* * Update receiver data. */ @@ -919,6 +927,16 @@ static int tcp_packet(struct nf_conn *ct, return -NF_ACCEPT; case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET + && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) + && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { + /* Invalid RST */ + write_unlock_bh(&tcp_lock); + if (LOG_INVALID(net, IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_tcp: invalid RST "); + return -NF_ACCEPT; + } + if (index == TCP_RST_SET && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_SYN_SET) || (!test_bit(IPS_ASSURED_BIT, &ct->status) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index fd326ac27ec8..66a6dd5c519a 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -581,6 +581,12 @@ nfulnl_log_packet(u_int8_t pf, + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)); + if (in && skb_mac_header_was_set(skb)) { + size += nla_total_size(skb->dev->hard_header_len) + + nla_total_size(sizeof(u_int16_t)) /* hwtype */ + + nla_total_size(sizeof(u_int16_t)); /* hwlen */ + } + spin_lock_bh(&inst->lock); if (inst->flags & NFULNL_CFG_F_SEQ) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index a5b5369c30f9..219dcdbe388c 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -926,7 +926,7 @@ static int dl_seq_show(struct seq_file *s, void *v) if (!hlist_empty(&htable->hash[*bucket])) { hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node) if (dl_seq_real_show(ent, htable->family, s)) - return 1; + return -1; } return 0; } diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c index 0f1218b8d289..67e38a056240 100644 --- a/net/rxrpc/ar-connection.c +++ b/net/rxrpc/ar-connection.c @@ -343,9 +343,9 @@ static int rxrpc_connect_exclusive(struct rxrpc_sock *rx, /* not yet present - create a candidate for a new connection * and then redo the check */ conn = rxrpc_alloc_connection(gfp); - if (IS_ERR(conn)) { - _leave(" = %ld", PTR_ERR(conn)); - return PTR_ERR(conn); + if (!conn) { + _leave(" = -ENOMEM"); + return -ENOMEM; } conn->trans = trans; @@ -508,9 +508,9 @@ int rxrpc_connect_call(struct rxrpc_sock *rx, /* not yet present - create a candidate for a new connection and then * redo the check */ candidate = rxrpc_alloc_connection(gfp); - if (IS_ERR(candidate)) { - _leave(" = %ld", PTR_ERR(candidate)); - return PTR_ERR(candidate); + if (!candidate) { + _leave(" = -ENOMEM"); + return -ENOMEM; } candidate->trans = trans; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 0759f32e9dca..09cdcdfe7e91 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -135,6 +135,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) unsigned long cl; unsigned long fh; int err; + int tp_created = 0; if (net != &init_net) return -EINVAL; @@ -266,10 +267,7 @@ replay: goto errout; } - spin_lock_bh(root_lock); - tp->next = *back; - *back = tp; - spin_unlock_bh(root_lock); + tp_created = 1; } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) goto errout; @@ -296,8 +294,11 @@ replay: switch (n->nlmsg_type) { case RTM_NEWTFILTER: err = -EEXIST; - if (n->nlmsg_flags & NLM_F_EXCL) + if (n->nlmsg_flags & NLM_F_EXCL) { + if (tp_created) + tcf_destroy(tp); goto errout; + } break; case RTM_DELTFILTER: err = tp->ops->delete(tp, fh); @@ -314,8 +315,18 @@ replay: } err = tp->ops->change(tp, cl, t->tcm_handle, tca, &fh); - if (err == 0) + if (err == 0) { + if (tp_created) { + spin_lock_bh(root_lock); + tp->next = *back; + *back = tp; + spin_unlock_bh(root_lock); + } tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER); + } else { + if (tp_created) + tcf_destroy(tp); + } errout: if (cl) diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 91a3db4a76f8..e5becb92b3e7 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -104,8 +104,7 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) { struct cls_cgroup_head *head = tp->root; - struct cgroup_cls_state *cs; - int ret = 0; + u32 classid; /* * Due to the nature of the classifier it is required to ignore all @@ -121,17 +120,18 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp, return -1; rcu_read_lock(); - cs = task_cls_state(current); - if (cs->classid && tcf_em_tree_match(skb, &head->ematches, NULL)) { - res->classid = cs->classid; - res->class = 0; - ret = tcf_exts_exec(skb, &head->exts, res); - } else - ret = -1; - + classid = task_cls_state(current)->classid; rcu_read_unlock(); - return ret; + if (!classid) + return -1; + + if (!tcf_em_tree_match(skb, &head->ematches, NULL)) + return -1; + + res->classid = classid; + res->class = 0; + return tcf_exts_exec(skb, &head->exts, res); } static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle) @@ -167,6 +167,9 @@ static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base, struct tcf_exts e; int err; + if (!tca[TCA_OPTIONS]) + return -EINVAL; + if (head == NULL) { if (!handle) return -EINVAL; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index ec697cebb63b..3b6418297231 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -303,6 +303,8 @@ restart: switch (teql_resolve(skb, skb_res, slave)) { case 0: if (__netif_tx_trylock(slave_txq)) { + unsigned int length = qdisc_pkt_len(skb); + if (!netif_tx_queue_stopped(slave_txq) && !netif_tx_queue_frozen(slave_txq) && slave_ops->ndo_start_xmit(skb, slave) == 0) { @@ -310,8 +312,7 @@ restart: master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); master->stats.tx_packets++; - master->stats.tx_bytes += - qdisc_pkt_len(skb); + master->stats.tx_bytes += length; return 0; } __netif_tx_unlock(slave_txq); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index af3198814c15..9d504234af4a 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -345,6 +345,7 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, lock_sock(sock->sk); sock->sk->sk_sndbuf = snd * 2; sock->sk->sk_rcvbuf = rcv * 2; + sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; release_sock(sock->sk); #endif } @@ -796,6 +797,23 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); + if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) + /* sndbuf needs to have room for one request + * per thread, otherwise we can stall even when the + * network isn't a bottleneck. + * + * We count all threads rather than threads in a + * particular pool, which provides an upper bound + * on the number of threads which will access the socket. + * + * rcvbuf just needs to be able to hold a few requests. + * Normally they will be removed from the queue + * as soon a a complete request arrives. + */ + svc_sock_setbufsize(svsk->sk_sock, + (serv->sv_nrthreads+3) * serv->sv_max_mesg, + 3 * serv->sv_max_mesg); + clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* Receive data. If we haven't got the record length yet, get @@ -1043,6 +1061,15 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; + /* initialise setting must have enough space to + * receive and respond to one request. + * svc_tcp_recvfrom will re-adjust if necessary + */ + svc_sock_setbufsize(svsk->sk_sock, + 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, + 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); + + set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (sk->sk_state != TCP_ESTABLISHED) set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); @@ -1112,14 +1139,8 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Initialize the socket */ if (sock->type == SOCK_DGRAM) svc_udp_init(svsk, serv); - else { - /* initialise setting must have enough space to - * receive and respond to one request. - */ - svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg, - 4 * serv->sv_max_mesg); + else svc_tcp_init(svsk, serv); - } dprintk("svc: svc_setup_socket created %p (inet %p)\n", svsk, svsk->sk_sk); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 8b510c5e8777..f11be72a1a80 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -128,7 +128,8 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt, page_bytes -= sge_bytes; frmr->page_list->page_list[page_no] = - ib_dma_map_page(xprt->sc_cm_id->device, page, 0, + ib_dma_map_single(xprt->sc_cm_id->device, + page_address(page), PAGE_SIZE, DMA_TO_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) @@ -532,18 +533,17 @@ static int send_reply(struct svcxprt_rdma *rdma, clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* Prepare the SGE for the RPCRDMA Header */ + ctxt->sge[0].lkey = rdma->sc_dma_lkey; + ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); ctxt->sge[0].addr = - ib_dma_map_page(rdma->sc_cm_id->device, - page, 0, PAGE_SIZE, DMA_TO_DEVICE); + ib_dma_map_single(rdma->sc_cm_id->device, page_address(page), + ctxt->sge[0].length, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) goto err; atomic_inc(&rdma->sc_dma_used); ctxt->direction = DMA_TO_DEVICE; - ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); - ctxt->sge[0].lkey = rdma->sc_dma_lkey; - /* Determine how many of our SGE are to be transmitted */ for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 4b0c2fa15e0b..5151f9f6c573 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -500,8 +500,8 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) BUG_ON(sge_no >= xprt->sc_max_sge); page = svc_rdma_get_page(); ctxt->pages[sge_no] = page; - pa = ib_dma_map_page(xprt->sc_cm_id->device, - page, 0, PAGE_SIZE, + pa = ib_dma_map_single(xprt->sc_cm_id->device, + page_address(page), PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) goto err_put_ctxt; @@ -1315,8 +1315,8 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); /* Prepare SGE for local address */ - sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, - p, 0, PAGE_SIZE, DMA_FROM_DEVICE); + sge.addr = ib_dma_map_single(xprt->sc_cm_id->device, + page_address(p), PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) { put_page(p); return; @@ -1343,7 +1343,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, if (ret) { dprintk("svcrdma: Error %d posting send for protocol error\n", ret); - ib_dma_unmap_page(xprt->sc_cm_id->device, + ib_dma_unmap_single(xprt->sc_cm_id->device, sge.addr, PAGE_SIZE, DMA_FROM_DEVICE); svc_rdma_put_context(ctxt, 1); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3b21e0cc5e69..465aafc2007f 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1495,7 +1495,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; frmr_wr.wr.fast_reg.access_flags = (writing ? - IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ); + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : + IB_ACCESS_REMOTE_READ); frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 08265ca15785..487cb627ddba 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1551,6 +1551,13 @@ static int regulatory_hint_core(const char *alpha2) queue_regulatory_request(request); + /* + * This ensures last_request is populated once modules + * come swinging in and calling regulatory hints and + * wiphy_apply_custom_regulatory(). + */ + flush_scheduled_work(); + return 0; } diff --git a/net/wireless/wext.c b/net/wireless/wext.c index cb6a5bb85d80..0e59f9ae9b81 100644 --- a/net/wireless/wext.c +++ b/net/wireless/wext.c @@ -786,6 +786,13 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd, err = -EFAULT; goto out; } + + if (cmd == SIOCSIWENCODEEXT) { + struct iw_encode_ext *ee = (void *) extra; + + if (iwp->length < sizeof(*ee) + ee->key_len) + return -EFAULT; + } } err = handler(dev, info, (union iwreq_data *) iwp, extra); diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index cba61ca403ca..2b706617c89a 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -188,20 +188,34 @@ cmd_objcopy = $(OBJCOPY) $(OBJCOPYFLAGS) $(OBJCOPYFLAGS_$(@F)) $< $@ # --------------------------------------------------------------------------- quiet_cmd_gzip = GZIP $@ -cmd_gzip = gzip -f -9 < $< > $@ +cmd_gzip = (cat $(filter-out FORCE,$^) | gzip -f -9 > $@) || \ + (rm -f $@ ; false) # Bzip2 # --------------------------------------------------------------------------- -# Bzip2 does not include size in file... so we have to fake that -size_append=$(CONFIG_SHELL) $(srctree)/scripts/bin_size - -quiet_cmd_bzip2 = BZIP2 $@ -cmd_bzip2 = (bzip2 -9 < $< && $(size_append) $<) > $@ || (rm -f $@ ; false) +# Bzip2 and LZMA do not include size in file... so we have to fake that; +# append the size as a 32-bit littleendian number as gzip does. +size_append = echo -ne $(shell \ +dec_size=0; \ +for F in $1; do \ + fsize=$$(stat -c "%s" $$F); \ + dec_size=$$(expr $$dec_size + $$fsize); \ +done; \ +printf "%08x" $$dec_size | \ + sed 's/\(..\)\(..\)\(..\)\(..\)/\\\\x\4\\\\x\3\\\\x\2\\\\x\1/g' \ +) + +quiet_cmd_bzip2 = BZIP2 $@ +cmd_bzip2 = (cat $(filter-out FORCE,$^) | \ + bzip2 -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \ + (rm -f $@ ; false) # Lzma # --------------------------------------------------------------------------- quiet_cmd_lzma = LZMA $@ -cmd_lzma = (lzma -9 -c $< && $(size_append) $<) >$@ || (rm -f $@ ; false) +cmd_lzma = (cat $(filter-out FORCE,$^) | \ + lzma -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \ + (rm -f $@ ; false) diff --git a/scripts/bin_size b/scripts/bin_size deleted file mode 100644 index 43e1b360cee6..000000000000 --- a/scripts/bin_size +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh - -if [ $# = 0 ] ; then - echo Usage: $0 file -fi - -size_dec=`stat -c "%s" $1` -size_hex_echo_string=`printf "%08x" $size_dec | - sed 's/\(..\)\(..\)\(..\)\(..\)/\\\\x\4\\\\x\3\\\\x\2\\\\x\1/g'` -/bin/echo -ne $size_hex_echo_string diff --git a/scripts/setlocalversion b/scripts/setlocalversion index 32c8554f3946..00790472f641 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -1,5 +1,13 @@ #!/bin/sh -# Print additional version information for non-release trees. +# +# This scripts adds local version information from the version +# control systems git, mercurial (hg) and subversion (svn). +# +# If something goes wrong, send a mail the kernel build mailinglist +# (see MAINTAINERS) and CC Nico Schottelius +# <nico-linuxsetlocalversion -at- schottelius.org>. +# +# usage() { echo "Usage: $0 [srctree]" >&2 @@ -10,12 +18,20 @@ cd "${1:-.}" || usage # Check for git and a git repo. if head=`git rev-parse --verify --short HEAD 2>/dev/null`; then - # Do we have an untagged tag? - if atag=`git describe 2>/dev/null`; then - echo "$atag" | awk -F- '{printf("-%05d-%s", $(NF-1),$(NF))}' - # add -g${head}, if there is no usable tag - else - printf '%s%s' -g $head + + # If we are at a tagged commit (like "v2.6.30-rc6"), we ignore it, + # because this version is defined in the top level Makefile. + if [ -z "`git describe --exact-match 2>/dev/null`" ]; then + + # If we are past a tagged commit (like "v2.6.30-rc5-302-g72357d5"), + # we pretty print it. + if atag="`git describe 2>/dev/null`"; then + echo "$atag" | awk -F- '{printf("-%05d-%s", $(NF-1),$(NF))}' + + # If we don't have a tag at all we print -g{commitish}. + else + printf '%s%s' -g $head + fi fi # Is this git on svn? diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index ffbe259700b1..510186f0b72e 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -84,8 +84,8 @@ static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos) * against concurrent list-extension */ rcu_read_lock(); - qe = list_entry(rcu_dereference(qe->later.next), - struct ima_queue_entry, later); + qe = list_entry_rcu(qe->later.next, + struct ima_queue_entry, later); rcu_read_unlock(); (*pos)++; diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index e03a7e19c73b..11d2cb19d7a6 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -734,8 +734,8 @@ static void smk_netlbladdr_insert(struct smk_netlbladdr *new) return; } - m = list_entry(rcu_dereference(smk_netlbladdr_list.next), - struct smk_netlbladdr, list); + m = list_entry_rcu(smk_netlbladdr_list.next, + struct smk_netlbladdr, list); /* the comparison '>' is a bit hacky, but works */ if (new->smk_mask.s_addr > m->smk_mask.s_addr) { @@ -748,8 +748,8 @@ static void smk_netlbladdr_insert(struct smk_netlbladdr *new) list_add_rcu(&new->list, &m->list); return; } - m_next = list_entry(rcu_dereference(m->list.next), - struct smk_netlbladdr, list); + m_next = list_entry_rcu(m->list.next, + struct smk_netlbladdr, list); if (new->smk_mask.s_addr > m_next->smk_mask.s_addr) { list_add_rcu(&new->list, &m->list); return; diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 5b481912752a..e42be5c4f055 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -27,6 +27,12 @@ static int tomoyo_cred_prepare(struct cred *new, const struct cred *old, static int tomoyo_bprm_set_creds(struct linux_binprm *bprm) { + int rc; + + rc = cap_bprm_set_creds(bprm); + if (rc) + return rc; + /* * Do only if this function is called for the first time of an execve * operation. diff --git a/sound/arm/aaci.c b/sound/arm/aaci.c index 7fbd68fab944..5c48e36038f2 100644 --- a/sound/arm/aaci.c +++ b/sound/arm/aaci.c @@ -1074,7 +1074,7 @@ static unsigned int __devinit aaci_size_fifo(struct aaci *aaci) return i; } -static int __devinit aaci_probe(struct amba_device *dev, void *id) +static int __devinit aaci_probe(struct amba_device *dev, struct amba_id *id) { struct aaci *aaci; int ret, i; diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index a2a792c18c40..d659995ac3ac 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -249,6 +249,11 @@ static int snd_pcm_update_hw_ptr_interrupt(struct snd_pcm_substream *substream) new_hw_ptr = hw_base + pos; } } + + /* Do jiffies check only in xrun_debug mode */ + if (!xrun_debug(substream)) + goto no_jiffies_check; + /* Skip the jiffies check for hardwares with BATCH flag. * Such hardware usually just increases the position at each IRQ, * thus it can't give any strange position. @@ -336,7 +341,9 @@ int snd_pcm_update_hw_ptr(struct snd_pcm_substream *substream) hw_base = 0; new_hw_ptr = hw_base + pos; } - if (((delta * HZ) / runtime->rate) > jdelta + HZ/100) { + /* Do jiffies check only in xrun_debug mode */ + if (xrun_debug(substream) && + ((delta * HZ) / runtime->rate) > jdelta + HZ/100) { hw_ptr_error(substream, "hw_ptr skipping! " "(pos=%ld, delta=%ld, period=%ld, jdelta=%lu/%lu)\n", @@ -1478,7 +1485,6 @@ static int snd_pcm_lib_ioctl_reset(struct snd_pcm_substream *substream, runtime->status->hw_ptr %= runtime->buffer_size; else runtime->status->hw_ptr = 0; - runtime->hw_ptr_jiffies = jiffies; snd_pcm_stream_unlock_irqrestore(substream, flags); return 0; } diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index fc6f98e257df..b5da656d1ece 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -848,6 +848,7 @@ static void snd_pcm_post_start(struct snd_pcm_substream *substream, int state) { struct snd_pcm_runtime *runtime = substream->runtime; snd_pcm_trigger_tstamp(substream); + runtime->hw_ptr_jiffies = jiffies; runtime->status->state = state; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && runtime->silence_size > 0) @@ -961,6 +962,11 @@ static int snd_pcm_do_pause(struct snd_pcm_substream *substream, int push) { if (substream->runtime->trigger_master != substream) return 0; + /* The jiffies check in snd_pcm_update_hw_ptr*() is done by + * a delta betwen the current jiffies, this gives a large enough + * delta, effectively to skip the check once. + */ + substream->runtime->hw_ptr_jiffies = jiffies - HZ * 1000; return substream->ops->trigger(substream, push ? SNDRV_PCM_TRIGGER_PAUSE_PUSH : SNDRV_PCM_TRIGGER_PAUSE_RELEASE); diff --git a/sound/drivers/pcsp/pcsp_mixer.c b/sound/drivers/pcsp/pcsp_mixer.c index 771955a9be71..199b03377142 100644 --- a/sound/drivers/pcsp/pcsp_mixer.c +++ b/sound/drivers/pcsp/pcsp_mixer.c @@ -51,7 +51,7 @@ static int pcsp_treble_info(struct snd_kcontrol *kcontrol, if (uinfo->value.enumerated.item > chip->max_treble) uinfo->value.enumerated.item = chip->max_treble; sprintf(uinfo->value.enumerated.name, "%lu", - PCSP_CALC_RATE(uinfo->value.enumerated.item)); + (unsigned long)PCSP_CALC_RATE(uinfo->value.enumerated.item)); return 0; } diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index 81bc93e5f1e3..7337abdbe4e3 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -958,10 +958,13 @@ static int patch_sigmatel_stac9708_3d(struct snd_ac97 * ac97) } static const struct snd_kcontrol_new snd_ac97_sigmatel_4speaker = -AC97_SINGLE("Sigmatel 4-Speaker Stereo Playback Switch", AC97_SIGMATEL_DAC2INVERT, 2, 1, 0); +AC97_SINGLE("Sigmatel 4-Speaker Stereo Playback Switch", + AC97_SIGMATEL_DAC2INVERT, 2, 1, 0); +/* "Sigmatel " removed due to excessive name length: */ static const struct snd_kcontrol_new snd_ac97_sigmatel_phaseinvert = -AC97_SINGLE("Sigmatel Surround Phase Inversion Playback Switch", AC97_SIGMATEL_DAC2INVERT, 3, 1, 0); +AC97_SINGLE("Surround Phase Inversion Playback Switch", + AC97_SIGMATEL_DAC2INVERT, 3, 1, 0); static const struct snd_kcontrol_new snd_ac97_sigmatel_controls[] = { AC97_SINGLE("Sigmatel DAC 6dB Attenuate", AC97_SIGMATEL_ANALOG, 1, 1, 0), diff --git a/sound/pci/ca0106/ca0106_mixer.c b/sound/pci/ca0106/ca0106_mixer.c index ad2888705d2a..c111efe61c3c 100644 --- a/sound/pci/ca0106/ca0106_mixer.c +++ b/sound/pci/ca0106/ca0106_mixer.c @@ -800,7 +800,7 @@ int __devinit snd_ca0106_mixer(struct snd_ca0106 *emu) "Capture Volume", "External Amplifier", "Sigmatel 4-Speaker Stereo Playback Switch", - "Sigmatel Surround Phase Inversion Playback ", + "Surround Phase Inversion Playback Switch", NULL }; static char *ca0106_rename_ctls[] = { diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 21e99cfa8c49..3128e1a6bc65 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2141,6 +2141,7 @@ static struct snd_pci_quirk probe_mask_list[] __devinitdata = { /* including bogus ALC268 in slot#2 that conflicts with ALC888 */ SND_PCI_QUIRK(0x17c0, 0x4085, "Medion MD96630", 0x01), /* forced codec slots */ + SND_PCI_QUIRK(0x1043, 0x1262, "ASUS W5Fm", 0x103), SND_PCI_QUIRK(0x1046, 0x1262, "ASUS W5F", 0x103), {} }; diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 56ce19e68cb5..4fcbe21829ab 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -1848,6 +1848,7 @@ static const char *cxt5051_models[CXT5051_MODELS] = { static struct snd_pci_quirk cxt5051_cfg_tbl[] = { SND_PCI_QUIRK(0x103c, 0x30cf, "HP DV6736", CXT5051_HP_DV6736), + SND_PCI_QUIRK(0x103c, 0x360b, "Compaq Presario CQ60", CXT5051_HP), SND_PCI_QUIRK(0x14f1, 0x0101, "Conexant Reference board", CXT5051_LAPTOP), SND_PCI_QUIRK(0x14f1, 0x5051, "HP Spartan 1.1", CXT5051_HP), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b8a0d3e79272..0fd258eba3a5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -776,6 +776,12 @@ static void alc_set_input_pin(struct hda_codec *codec, hda_nid_t nid, pincap = (pincap & AC_PINCAP_VREF) >> AC_PINCAP_VREF_SHIFT; if (pincap & AC_PINCAP_VREF_80) val = PIN_VREF80; + else if (pincap & AC_PINCAP_VREF_50) + val = PIN_VREF50; + else if (pincap & AC_PINCAP_VREF_100) + val = PIN_VREF100; + else if (pincap & AC_PINCAP_VREF_GRD) + val = PIN_VREFGRD; } snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, val); } @@ -12058,6 +12064,7 @@ static struct snd_pci_quirk alc268_cfg_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0253, "Dell OEM", ALC268_DELL), SND_PCI_QUIRK(0x1028, 0x02b0, "Dell Inspiron Mini9", ALC268_DELL), SND_PCI_QUIRK(0x103c, 0x30cc, "TOSHIBA", ALC268_TOSHIBA), + SND_PCI_QUIRK(0x103c, 0x30f1, "HP TX25xx series", ALC268_TOSHIBA), SND_PCI_QUIRK(0x1043, 0x1205, "ASUS W7J", ALC268_3ST), SND_PCI_QUIRK(0x1179, 0xff10, "TOSHIBA A205", ALC268_TOSHIBA), SND_PCI_QUIRK(0x1179, 0xff50, "TOSHIBA A305", ALC268_TOSHIBA), diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 03b3646018a1..d2fd8ef6aef8 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -150,6 +150,7 @@ enum { STAC_D965_REF, STAC_D965_3ST, STAC_D965_5ST, + STAC_D965_5ST_NO_FP, STAC_DELL_3ST, STAC_DELL_BIOS, STAC_927X_MODELS @@ -2154,6 +2155,13 @@ static unsigned int d965_5st_pin_configs[14] = { 0x40000100, 0x40000100 }; +static unsigned int d965_5st_no_fp_pin_configs[14] = { + 0x40000100, 0x40000100, 0x0181304e, 0x01014010, + 0x01a19040, 0x01011012, 0x01016011, 0x40000100, + 0x40000100, 0x40000100, 0x40000100, 0x01442070, + 0x40000100, 0x40000100 +}; + static unsigned int dell_3st_pin_configs[14] = { 0x02211230, 0x02a11220, 0x01a19040, 0x01114210, 0x01111212, 0x01116211, 0x01813050, 0x01112214, @@ -2166,6 +2174,7 @@ static unsigned int *stac927x_brd_tbl[STAC_927X_MODELS] = { [STAC_D965_REF] = ref927x_pin_configs, [STAC_D965_3ST] = d965_3st_pin_configs, [STAC_D965_5ST] = d965_5st_pin_configs, + [STAC_D965_5ST_NO_FP] = d965_5st_no_fp_pin_configs, [STAC_DELL_3ST] = dell_3st_pin_configs, [STAC_DELL_BIOS] = NULL, }; @@ -2176,6 +2185,7 @@ static const char *stac927x_models[STAC_927X_MODELS] = { [STAC_D965_REF] = "ref", [STAC_D965_3ST] = "3stack", [STAC_D965_5ST] = "5stack", + [STAC_D965_5ST_NO_FP] = "5stack-no-fp", [STAC_DELL_3ST] = "dell-3stack", [STAC_DELL_BIOS] = "dell-bios", }; diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index 823296d7d578..a6b88482637b 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -3347,7 +3347,7 @@ static int snd_usb_create_quirk(struct snd_usb_audio *chip, [QUIRK_MIDI_YAMAHA] = snd_usb_create_midi_interface, [QUIRK_MIDI_MIDIMAN] = snd_usb_create_midi_interface, [QUIRK_MIDI_NOVATION] = snd_usb_create_midi_interface, - [QUIRK_MIDI_RAW] = snd_usb_create_midi_interface, + [QUIRK_MIDI_FASTLANE] = snd_usb_create_midi_interface, [QUIRK_MIDI_EMAGIC] = snd_usb_create_midi_interface, [QUIRK_MIDI_CME] = snd_usb_create_midi_interface, [QUIRK_AUDIO_STANDARD_INTERFACE] = create_standard_audio_quirk, diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index 36e4f7a29adc..8e7f78941ba6 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -153,7 +153,7 @@ enum quirk_type { QUIRK_MIDI_YAMAHA, QUIRK_MIDI_MIDIMAN, QUIRK_MIDI_NOVATION, - QUIRK_MIDI_RAW, + QUIRK_MIDI_FASTLANE, QUIRK_MIDI_EMAGIC, QUIRK_MIDI_CME, QUIRK_MIDI_US122L, diff --git a/sound/usb/usbmidi.c b/sound/usb/usbmidi.c index 26bad373fe65..2fb35cc22a30 100644 --- a/sound/usb/usbmidi.c +++ b/sound/usb/usbmidi.c @@ -1778,8 +1778,18 @@ int snd_usb_create_midi_interface(struct snd_usb_audio* chip, umidi->usb_protocol_ops = &snd_usbmidi_novation_ops; err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); break; - case QUIRK_MIDI_RAW: + case QUIRK_MIDI_FASTLANE: umidi->usb_protocol_ops = &snd_usbmidi_raw_ops; + /* + * Interface 1 contains isochronous endpoints, but with the same + * numbers as in interface 0. Since it is interface 1 that the + * USB core has most recently seen, these descriptors are now + * associated with the endpoint numbers. This will foul up our + * attempts to submit bulk/interrupt URBs to the endpoints in + * interface 0, so we have to make sure that the USB core looks + * again at interface 0 by calling usb_set_interface() on it. + */ + usb_set_interface(umidi->chip->dev, 0, 0); err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); break; case QUIRK_MIDI_EMAGIC: diff --git a/sound/usb/usbquirks.h b/sound/usb/usbquirks.h index 647ef5029651..5d955aaad85f 100644 --- a/sound/usb/usbquirks.h +++ b/sound/usb/usbquirks.h @@ -1868,7 +1868,7 @@ YAMAHA_DEVICE(0x7010, "UB99"), .data = & (const struct snd_usb_audio_quirk[]) { { .ifnum = 0, - .type = QUIRK_MIDI_RAW + .type = QUIRK_MIDI_FASTLANE }, { .ifnum = 1, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1ecbe2391c8b..4d0dd390aa50 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2301,10 +2301,11 @@ int kvm_init(void *opaque, unsigned int vcpu_size, bad_pfn = page_to_pfn(bad_page); - if (!alloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { + if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { r = -ENOMEM; goto out_free_0; } + cpumask_clear(cpus_hardware_enabled); r = kvm_arch_hardware_setup(); if (r < 0) |