From bd4cfb594bdea00c3920b31bd12f497fc4a2e79c Mon Sep 17 00:00:00 2001 From: Nicolas Kaiser Date: Sun, 20 Nov 2005 21:11:31 -0800 Subject: [NETFILTER]: Remove ARRAY_SIZE duplicate Signed-off-by: Nicolas Kaiser Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_sctp.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ipt_sctp.h b/include/linux/netfilter_ipv4/ipt_sctp.h index e93a9ec99fc2..80b3dbacd193 100644 --- a/include/linux/netfilter_ipv4/ipt_sctp.h +++ b/include/linux/netfilter_ipv4/ipt_sctp.h @@ -7,8 +7,6 @@ #define IPT_SCTP_VALID_FLAGS 0x07 -#define ELEMCOUNT(x) (sizeof(x)/sizeof(x[0])) - struct ipt_sctp_flag_info { u_int8_t chunktype; @@ -59,21 +57,21 @@ struct ipt_sctp_info { #define SCTP_CHUNKMAP_RESET(chunkmap) \ do { \ int i; \ - for (i = 0; i < ELEMCOUNT(chunkmap); i++) \ + for (i = 0; i < ARRAY_SIZE(chunkmap); i++) \ chunkmap[i] = 0; \ } while (0) #define SCTP_CHUNKMAP_SET_ALL(chunkmap) \ do { \ int i; \ - for (i = 0; i < ELEMCOUNT(chunkmap); i++) \ + for (i = 0; i < ARRAY_SIZE(chunkmap); i++) \ chunkmap[i] = ~0; \ } while (0) #define SCTP_CHUNKMAP_COPY(destmap, srcmap) \ do { \ int i; \ - for (i = 0; i < ELEMCOUNT(chunkmap); i++) \ + for (i = 0; i < ARRAY_SIZE(chunkmap); i++) \ destmap[i] = srcmap[i]; \ } while (0) @@ -81,7 +79,7 @@ struct ipt_sctp_info { ({ \ int i; \ int flag = 1; \ - for (i = 0; i < ELEMCOUNT(chunkmap); i++) { \ + for (i = 0; i < ARRAY_SIZE(chunkmap); i++) { \ if (chunkmap[i]) { \ flag = 0; \ break; \ @@ -94,7 +92,7 @@ struct ipt_sctp_info { ({ \ int i; \ int flag = 1; \ - for (i = 0; i < ELEMCOUNT(chunkmap); i++) { \ + for (i = 0; i < ARRAY_SIZE(chunkmap); i++) { \ if (chunkmap[i] != ~0) { \ flag = 0; \ break; \ -- cgit v1.2.3 From b84f4cc977ec4a1260dc8d9165efc9319a93c2a2 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 20 Nov 2005 21:19:21 -0800 Subject: [NET]: Use unused bit for ipvs_property field in struct sk_buff Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0a8ea8b35816..e797d9ef0e28 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -264,16 +264,14 @@ struct sk_buff { nohdr:1, nfctinfo:3; __u8 pkt_type:3, - fclone:2; + fclone:2, + ipvs_property:1; __be16 protocol; void (*destructor)(struct sk_buff *skb); #ifdef CONFIG_NETFILTER __u32 nfmark; struct nf_conntrack *nfct; -#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) - __u8 ipvs_property:1; -#endif #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct sk_buff *nfct_reasm; #endif -- cgit v1.2.3 From 461ddf3b90bb149b99c3f675959c1bd6b11ed936 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 20 Nov 2005 21:25:15 -0800 Subject: [NET]: kernel-doc fixes Fix kernel-doc warnings in network files. Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- Documentation/DocBook/kernel-api.tmpl | 6 ++++-- include/linux/skbuff.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index 096aed62c326..767433bdbc40 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl @@ -237,8 +237,10 @@ X!Ilib/string.c Driver Support !Enet/core/dev.c !Enet/ethernet/eth.c -!Einclude/linux/etherdevice.h -!Enet/core/wireless.c +!Iinclude/linux/etherdevice.h + Synchronous PPP !Edrivers/net/wan/syncppp.c diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e797d9ef0e28..8c5d6001a923 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -206,6 +206,7 @@ enum { * @nfct: Associated connection, if any * @ipvs_property: skbuff is owned by ipvs * @nfctinfo: Relationship of this skb to the connection + * @nfct_reasm: netfilter conntrack re-assembly pointer * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @tc_index: Traffic control index * @tc_verd: traffic control verdict -- cgit v1.2.3 From c243f1f1f6545985afcc6adf1fc085729029c3ee Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 21 Nov 2005 06:53:16 -0800 Subject: [AGPGART] Support VIA P4M800CE bridge. Signed-off-by: Dave Jones --- drivers/char/agp/via-agp.c | 6 ++++++ include/linux/pci_ids.h | 1 + 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/drivers/char/agp/via-agp.c b/drivers/char/agp/via-agp.c index c847df575cf5..97b0a890ba7f 100644 --- a/drivers/char/agp/via-agp.c +++ b/drivers/char/agp/via-agp.c @@ -371,6 +371,11 @@ static struct agp_device_ids via_agp_device_ids[] __devinitdata = .device_id = PCI_DEVICE_ID_VIA_3296_0, .chipset_name = "P4M800", }, + /* P4M800CE */ + { + .device_id = PCI_DEVICE_ID_VIA_P4M800CE, + .chipset_name = "P4M800CE", + }, { }, /* dummy final entry, always present */ }; @@ -511,6 +516,7 @@ static struct pci_device_id agp_via_pci_table[] = { ID(PCI_DEVICE_ID_VIA_3269_0), ID(PCI_DEVICE_ID_VIA_83_87XX_1), ID(PCI_DEVICE_ID_VIA_3296_0), + ID(PCI_DEVICE_ID_VIA_P4M800CE), { } }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index d4c1c8fd2925..c99a83f88dc9 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1198,6 +1198,7 @@ #define PCI_DEVICE_ID_VIA_3269_0 0x0269 #define PCI_DEVICE_ID_VIA_K8T800PRO_0 0x0282 #define PCI_DEVICE_ID_VIA_8363_0 0x0305 +#define PCI_DEVICE_ID_VIA_P4M800CE 0x0314 #define PCI_DEVICE_ID_VIA_8371_0 0x0391 #define PCI_DEVICE_ID_VIA_8501_0 0x0501 #define PCI_DEVICE_ID_VIA_82C561 0x0561 -- cgit v1.2.3 From 664beed0190fae687ac51295694004902ddeb18e Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 21 Nov 2005 21:32:14 -0800 Subject: [PATCH] unpaged: unifdefed PageCompound It looks like snd_xxx is not the only nopage to be using PageReserved as a way of holding a high-order page together: which no longer works, but is masked by our failure to free from VM_RESERVED areas. We cannot fix that bug without first substituting another way to hold the high-order page together, while farming out the 0-order pages from within it. That's just what PageCompound is designed for, but it's been kept under CONFIG_HUGETLB_PAGE. Remove the #ifdefs: which saves some space (out- of-line put_page), doesn't slow down what most needs to be fast (already using hugetlb), and unifies the way we handle high-order pages. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 19 ------------------- include/linux/page-flags.h | 4 ---- mm/page_alloc.c | 5 ----- mm/swap.c | 3 --- 4 files changed, 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0986d19be0b7..9701210c6680 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -311,8 +311,6 @@ struct page { extern void FASTCALL(__page_cache_release(struct page *)); -#ifdef CONFIG_HUGETLB_PAGE - static inline int page_count(struct page *page) { if (PageCompound(page)) @@ -329,23 +327,6 @@ static inline void get_page(struct page *page) void put_page(struct page *page); -#else /* CONFIG_HUGETLB_PAGE */ - -#define page_count(p) (atomic_read(&(p)->_count) + 1) - -static inline void get_page(struct page *page) -{ - atomic_inc(&page->_count); -} - -static inline void put_page(struct page *page) -{ - if (put_page_testzero(page)) - __page_cache_release(page); -} - -#endif /* CONFIG_HUGETLB_PAGE */ - /* * Multiple processes may "see" the same page. E.g. for untouched * mappings of /dev/null, all processes see the same page full of diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index f34767c5fc79..343083fec258 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -287,11 +287,7 @@ extern void __mod_page_state(unsigned long offset, unsigned long delta); #define ClearPageReclaim(page) clear_bit(PG_reclaim, &(page)->flags) #define TestClearPageReclaim(page) test_and_clear_bit(PG_reclaim, &(page)->flags) -#ifdef CONFIG_HUGETLB_PAGE #define PageCompound(page) test_bit(PG_compound, &(page)->flags) -#else -#define PageCompound(page) 0 -#endif #define SetPageCompound(page) set_bit(PG_compound, &(page)->flags) #define ClearPageCompound(page) clear_bit(PG_compound, &(page)->flags) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bd4de592dc23..23b84c4e1a57 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -148,10 +148,6 @@ static void bad_page(const char *function, struct page *page) add_taint(TAINT_BAD_PAGE); } -#ifndef CONFIG_HUGETLB_PAGE -#define prep_compound_page(page, order) do { } while (0) -#define destroy_compound_page(page, order) do { } while (0) -#else /* * Higher-order pages are called "compound pages". They are structured thusly: * @@ -205,7 +201,6 @@ static void destroy_compound_page(struct page *page, unsigned long order) ClearPageCompound(p); } } -#endif /* CONFIG_HUGETLB_PAGE */ /* * function for dealing with page's order in buddy system. diff --git a/mm/swap.c b/mm/swap.c index d09cf7f03e76..73d351439ef6 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -34,8 +34,6 @@ /* How many pages do we try to swap or page in/out together? */ int page_cluster; -#ifdef CONFIG_HUGETLB_PAGE - void put_page(struct page *page) { if (unlikely(PageCompound(page))) { @@ -52,7 +50,6 @@ void put_page(struct page *page) __page_cache_release(page); } EXPORT_SYMBOL(put_page); -#endif /* * Writeback is about to end against a page which has been marked for immediate -- cgit v1.2.3 From 0b14c179a483e71ea41df2aa4a661760063115bd Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 21 Nov 2005 21:32:15 -0800 Subject: [PATCH] unpaged: VM_UNPAGED Although we tend to associate VM_RESERVED with remap_pfn_range, quite a few drivers set VM_RESERVED on areas which are then populated by nopage. The PageReserved removal in 2.6.15-rc1 changed VM_RESERVED not to free pages in zap_pte_range, without changing those drivers not to set it: so their pages just leak away. Let's not change miscellaneous drivers now: introduce VM_UNPAGED at the core, to flag the special areas where the ptes may have no struct page, or if they have then it's not to be touched. Replace most instances of VM_RESERVED in core mm by VM_UNPAGED. Force it on in remap_pfn_range, and the sparc and sparc64 io_remap_pfn_range. Revert addition of VM_RESERVED to powerpc vdso, it's not needed there. Is it needed anywhere? It still governs the mm->reserved_vm statistic, and special vmas not to be merged, and areas not to be core dumped; but could probably be eliminated later (the drivers are probably specifying it because in 2.4 it kept swapout off the vma, but in 2.6 we work from the LRU, which these pages don't get on). Use the VM_SHM slot for VM_UNPAGED, and define VM_SHM to 0: it serves no purpose whatsoever, and should be removed from drivers when we clean up. Signed-off-by: Hugh Dickins Acked-by: William Irwin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/vdso.c | 3 +-- arch/sparc/mm/generic.c | 2 +- arch/sparc64/mm/generic.c | 2 +- include/linux/mm.h | 5 +++-- mm/fremap.c | 4 ++-- mm/madvise.c | 2 +- mm/memory.c | 30 ++++++++++++++++++------------ mm/mempolicy.c | 2 +- mm/msync.c | 4 ++-- 9 files changed, 30 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 0d4d8bec0df4..b44b36e0c293 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -285,8 +285,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, * It's fine to use that for setting breakpoints in the vDSO code * pages though */ - vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | - VM_MAYEXEC | VM_RESERVED; + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; vma->vm_flags |= mm->def_flags; vma->vm_page_prot = protection_map[vma->vm_flags & 0x7]; vma->vm_ops = &vdso_vmops; diff --git a/arch/sparc/mm/generic.c b/arch/sparc/mm/generic.c index 9604893ffdbd..0410bae681f8 100644 --- a/arch/sparc/mm/generic.c +++ b/arch/sparc/mm/generic.c @@ -74,7 +74,7 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT; /* See comment in mm/memory.c remap_pfn_range */ - vma->vm_flags |= VM_IO | VM_RESERVED; + vma->vm_flags |= VM_IO | VM_RESERVED | VM_UNPAGED; prot = __pgprot(pg_iobits); offset -= from; diff --git a/arch/sparc64/mm/generic.c b/arch/sparc64/mm/generic.c index 112c316e7cd2..8fd4cb1f050a 100644 --- a/arch/sparc64/mm/generic.c +++ b/arch/sparc64/mm/generic.c @@ -128,7 +128,7 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT; /* See comment in mm/memory.c remap_pfn_range */ - vma->vm_flags |= VM_IO | VM_RESERVED; + vma->vm_flags |= VM_IO | VM_RESERVED | VM_UNPAGED; prot = __pgprot(pg_iobits); offset -= from; diff --git a/include/linux/mm.h b/include/linux/mm.h index 9701210c6680..f0cdfd18db55 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -144,7 +144,8 @@ extern unsigned int kobjsize(const void *objp); #define VM_GROWSDOWN 0x00000100 /* general info on the segment */ #define VM_GROWSUP 0x00000200 -#define VM_SHM 0x00000400 /* shared memory area, don't swap out */ +#define VM_SHM 0x00000000 /* Means nothing: delete it later */ +#define VM_UNPAGED 0x00000400 /* Pages managed without map count */ #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ #define VM_EXECUTABLE 0x00001000 @@ -157,7 +158,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ -#define VM_RESERVED 0x00080000 /* Pages managed in a special way */ +#define VM_RESERVED 0x00080000 /* Count as reserved_vm like IO */ #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ diff --git a/mm/fremap.c b/mm/fremap.c index d862be3bc3e3..94254c5d7a18 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -65,7 +65,7 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma, pte_t pte_val; spinlock_t *ptl; - BUG_ON(vma->vm_flags & VM_RESERVED); + BUG_ON(vma->vm_flags & VM_UNPAGED); pgd = pgd_offset(mm, addr); pud = pud_alloc(mm, pgd, addr); @@ -122,7 +122,7 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, pte_t pte_val; spinlock_t *ptl; - BUG_ON(vma->vm_flags & VM_RESERVED); + BUG_ON(vma->vm_flags & VM_UNPAGED); pgd = pgd_offset(mm, addr); pud = pud_alloc(mm, pgd, addr); diff --git a/mm/madvise.c b/mm/madvise.c index 17aaf3e16449..328a3bcce527 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -126,7 +126,7 @@ static long madvise_dontneed(struct vm_area_struct * vma, unsigned long start, unsigned long end) { *prev = vma; - if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_RESERVED)) + if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_UNPAGED)) return -EINVAL; if (unlikely(vma->vm_flags & VM_NONLINEAR)) { diff --git a/mm/memory.c b/mm/memory.c index cfce5f1f30f2..ece04963158e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -334,7 +334,7 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss) /* * This function is called to print an error when a pte in a - * !VM_RESERVED region is found pointing to an invalid pfn (which + * !VM_UNPAGED region is found pointing to an invalid pfn (which * is an error. * * The calling function must still handle the error. @@ -381,15 +381,15 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, goto out_set_pte; } - /* If the region is VM_RESERVED, the mapping is not + /* If the region is VM_UNPAGED, the mapping is not * mapped via rmap - duplicate the pte as is. */ - if (vm_flags & VM_RESERVED) + if (vm_flags & VM_UNPAGED) goto out_set_pte; pfn = pte_pfn(pte); /* If the pte points outside of valid memory but - * the region is not VM_RESERVED, we have a problem. + * the region is not VM_UNPAGED, we have a problem. */ if (unlikely(!pfn_valid(pfn))) { print_bad_pte(vma, pte, addr); @@ -528,7 +528,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, * readonly mappings. The tradeoff is that copy_page_range is more * efficient than faulting. */ - if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_RESERVED))) { + if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_UNPAGED))) { if (!vma->anon_vma) return 0; } @@ -572,7 +572,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, (*zap_work) -= PAGE_SIZE; - if (!(vma->vm_flags & VM_RESERVED)) { + if (!(vma->vm_flags & VM_UNPAGED)) { unsigned long pfn = pte_pfn(ptent); if (unlikely(!pfn_valid(pfn))) print_bad_pte(vma, ptent, addr); @@ -1191,10 +1191,16 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, * rest of the world about it: * VM_IO tells people not to look at these pages * (accesses can have side effects). - * VM_RESERVED tells the core MM not to "manage" these pages - * (e.g. refcount, mapcount, try to swap them out). + * VM_RESERVED is specified all over the place, because + * in 2.4 it kept swapout's vma scan off this vma; but + * in 2.6 the LRU scan won't even find its pages, so this + * flag means no more than count its pages in reserved_vm, + * and omit it from core dump, even when VM_IO turned off. + * VM_UNPAGED tells the core MM not to "manage" these pages + * (e.g. refcount, mapcount, try to swap them out): in + * particular, zap_pte_range does not try to free them. */ - vma->vm_flags |= VM_IO | VM_RESERVED; + vma->vm_flags |= VM_IO | VM_RESERVED | VM_UNPAGED; BUG_ON(addr >= end); pfn -= addr >> PAGE_SHIFT; @@ -1276,7 +1282,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, pte_t entry; int ret = VM_FAULT_MINOR; - BUG_ON(vma->vm_flags & VM_RESERVED); + BUG_ON(vma->vm_flags & VM_UNPAGED); if (unlikely(!pfn_valid(pfn))) { /* @@ -1924,7 +1930,7 @@ retry: inc_mm_counter(mm, anon_rss); lru_cache_add_active(new_page); page_add_anon_rmap(new_page, vma, address); - } else if (!(vma->vm_flags & VM_RESERVED)) { + } else if (!(vma->vm_flags & VM_UNPAGED)) { inc_mm_counter(mm, file_rss); page_add_file_rmap(new_page); } @@ -2203,7 +2209,7 @@ static int __init gate_vma_init(void) gate_vma.vm_start = FIXADDR_USER_START; gate_vma.vm_end = FIXADDR_USER_END; gate_vma.vm_page_prot = PAGE_READONLY; - gate_vma.vm_flags = VM_RESERVED; + gate_vma.vm_flags = 0; return 0; } __initcall(gate_vma_init); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 5abc57c2b8bd..5609a31bdf22 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -269,7 +269,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, first = find_vma(mm, start); if (!first) return ERR_PTR(-EFAULT); - if (first->vm_flags & VM_RESERVED) + if (first->vm_flags & VM_UNPAGED) return ERR_PTR(-EACCES); prev = NULL; for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { diff --git a/mm/msync.c b/mm/msync.c index 0e040e9c39d8..b3f4caf3010b 100644 --- a/mm/msync.c +++ b/mm/msync.c @@ -97,9 +97,9 @@ static void msync_page_range(struct vm_area_struct *vma, /* For hugepages we can't go walking the page table normally, * but that's ok, hugetlbfs is memory based, so we don't need * to do anything more on an msync(). - * Can't do anything with VM_RESERVED regions either. + * Can't do anything with VM_UNPAGED regions either. */ - if (vma->vm_flags & (VM_HUGETLB|VM_RESERVED)) + if (vma->vm_flags & (VM_HUGETLB|VM_UNPAGED)) return; BUG_ON(addr >= end); -- cgit v1.2.3 From af2b4079ab154bd12e8c12b02db5f31b31babe63 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 22 Nov 2005 14:38:04 -0800 Subject: [NET]: Shut up warnings in net/core/flow.c Not really a network problem, more a !SMP issue. net/core/flow.c:295: warning: statement with no effect flow.c:295: smp_call_function(flow_cache_flush_per_cpu, &info, 1, 0); Fix this by converting the macro to an inline function, which also increases the typechecking for !SMP builds. Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/smp.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 9dfa3ee769ae..b6069c8e1f04 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -94,7 +94,13 @@ void smp_prepare_boot_cpu(void); */ #define raw_smp_processor_id() 0 #define hard_smp_processor_id() 0 -#define smp_call_function(func,info,retry,wait) ({ 0; }) + +static inline int smp_call_function(void (*func) (void *info), void *info, + int retry, int wait) +{ + return 0; +} + #define on_each_cpu(func,info,retry,wait) ({ func(info); 0; }) static inline void smp_send_reschedule(int cpu) { } #define num_booting_cpus() 1 -- cgit v1.2.3 From ac3461ad632e86e7debd871776683c05ef3ba4c6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 22 Nov 2005 19:39:30 -0800 Subject: Fix up GFP_ZONEMASK for GFP_DMA32 usage There was some confusion about the different zone usage, this should fix up the resulting mess in the GFP zonemask handling. The different zone usage is still confusing (it's very easy to mix up the individual zone numbers with the GFP zone _list_ numbers), so we might want to clean up some of this in the future, but in the meantime this should fix the actual problems. Acked-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 9 +++++++-- include/linux/mmzone.h | 18 ++++-------------- 2 files changed, 11 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 313dfe9b443a..8b2eab90abb6 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -11,7 +11,7 @@ struct vm_area_struct; /* * GFP bitmasks.. */ -/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */ +/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */ #define __GFP_DMA ((__force gfp_t)0x01u) #define __GFP_HIGHMEM ((__force gfp_t)0x02u) #ifdef CONFIG_DMA_IS_DMA32 @@ -74,7 +74,12 @@ struct vm_area_struct; #define GFP_DMA32 __GFP_DMA32 -#define gfp_zone(mask) ((__force int)((mask) & (__force gfp_t)GFP_ZONEMASK)) +static inline int gfp_zone(gfp_t gfp) +{ + int zone = GFP_ZONEMASK & (__force int) gfp; + BUG_ON(zone >= GFP_ZONETYPES); + return zone; +} /* * There is only one page-allocator function, and two main namespaces to diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2c8edad5dccf..9f22090df7dd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -91,21 +91,11 @@ struct per_cpu_pageset { * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will * be 8 (2 ** 3) zonelists. GFP_ZONETYPES defines the number of possible * combinations of zone modifiers in "zone modifier space". + * + * NOTE! Make sure this matches the zones in */ -#define GFP_ZONEMASK 0x03 -/* - * As an optimisation any zone modifier bits which are only valid when - * no other zone modifier bits are set (loners) should be placed in - * the highest order bits of this field. This allows us to reduce the - * extent of the zonelists thus saving space. For example in the case - * of three zone modifier bits, we could require up to eight zonelists. - * If the left most zone modifier is a "loner" then the highest valid - * zonelist would be four allowing us to allocate only five zonelists. - * Use the first form when the left most bit is not a "loner", otherwise - * use the second. - */ -/* #define GFP_ZONETYPES (GFP_ZONEMASK + 1) */ /* Non-loner */ -#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ +#define GFP_ZONEMASK 0x07 +#define GFP_ZONETYPES 5 /* * On machines where it is needed (eg PCs) we divide physical memory -- cgit v1.2.3 From 1778d55edb62753a92b979fa57072c2e1ff3d062 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 22 Nov 2005 21:58:37 -0800 Subject: compat-ioctl.c: fix compile with no CONFIG_JBD The ext3 compat-ioctl translation wants to translate data structures that only declared when CONFIG_JBD was enabled. So make play nicely even when we don't actually end up using it. Acked-by: Andrew Morton Acked-by: Jeffrey Hundstad Acked-by: Zan Lynx Signed-off-by: Linus Torvalds --- include/linux/jbd.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd.h b/include/linux/jbd.h index aa56172c6fed..dcde7adfdce5 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -16,8 +16,6 @@ #ifndef _LINUX_JBD_H #define _LINUX_JBD_H -#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || !defined(__KERNEL__) - /* Allow this file to be included directly into e2fsprogs */ #ifndef __KERNEL__ #include "jfs_compat.h" @@ -1083,19 +1081,4 @@ extern int jbd_blocks_per_page(struct inode *inode); #endif /* __KERNEL__ */ -#endif /* CONFIG_JBD || CONFIG_JBD_MODULE || !__KERNEL__ */ - -/* - * Compatibility no-ops which allow the kernel to compile without CONFIG_JBD - * go here. - */ - -#if defined(__KERNEL__) && !(defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE)) - -#define J_ASSERT(expr) do {} while (0) -#define J_ASSERT_BH(bh, expr) do {} while (0) -#define buffer_jbd(bh) 0 -#define journal_buffer_journal_lru(bh) 0 - -#endif /* defined(__KERNEL__) && !defined(CONFIG_JBD) */ #endif /* _LINUX_JBD_H */ -- cgit v1.2.3 From 2d0ebb36038c0626cde662a3b06da9787cfb68c3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 23 Nov 2005 08:44:05 -0800 Subject: Revert "[NET]: Shut up warnings in net/core/flow.c" This reverts commit af2b4079ab154bd12e8c12b02db5f31b31babe63 Changing the #define to an inline function breaks on non-SMP builds, since wuite a few places in the kernel do not implement the ipi handler when compiling for UP. Signed-off-by: Linus Torvalds --- include/linux/smp.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index b6069c8e1f04..9dfa3ee769ae 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -94,13 +94,7 @@ void smp_prepare_boot_cpu(void); */ #define raw_smp_processor_id() 0 #define hard_smp_processor_id() 0 - -static inline int smp_call_function(void (*func) (void *info), void *info, - int retry, int wait) -{ - return 0; -} - +#define smp_call_function(func,info,retry,wait) ({ 0; }) #define on_each_cpu(func,info,retry,wait) ({ func(info); 0; }) static inline void smp_send_reschedule(int cpu) { } #define num_booting_cpus() 1 -- cgit v1.2.3 From 8dd396ec7bf706fe85d8c6792b478ee6f09e8de6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 23 Nov 2005 15:45:53 -0800 Subject: [PATCH] USB: kernel-doc for linux/usb.h Fix kernel-doc warning in linux/usb.h. Signed-off-by: Randy Dunlap Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- include/linux/usb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 856d232c7562..d81b050e5955 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -47,6 +47,7 @@ struct usb_driver; * @urb_list: urbs queued to this endpoint; maintained by usbcore * @hcpriv: for use by HCD; typically holds hardware dma queue head (QH) * with one or more transfer descriptors (TDs) per urb + * @kobj: kobject for sysfs info * @extra: descriptors following this endpoint in the configuration * @extralen: how many bytes of "extra" are valid * -- cgit v1.2.3 From f5417612d787e6b619fd69616bbf95f1b895e900 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Mon, 28 Nov 2005 18:09:44 +0000 Subject: [ARM] 3181/1: add PORT_ identifier for Hilscher netx uart Patch from Sascha Hauer This patch adds PORT_NETX for supporting the Hilscher netx embedded UARTs. Signed-off-by: Sascha Hauer Signed-off-by: Russell King --- include/linux/serial_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index a3ac92b19aca..e3710d7e260a 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -121,6 +121,9 @@ #define PORT_IP3106 70 +/* Hilscher netx */ +#define PORT_NETX 71 + #ifdef __KERNEL__ #include -- cgit v1.2.3 From 24117defabc849a6ad5081ad0fafd0664bf55f13 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Mon, 28 Nov 2005 21:00:29 +0000 Subject: [MMC] Fix protocol errors A review against MMC/SD specifications found some errors in the current implementation. Signed-off-by: Pierre Ossman Signed-off-by: Russell King --- drivers/mmc/mmc.c | 2 +- include/linux/mmc/protocol.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c index da528390acf8..d336a1d65dc7 100644 --- a/drivers/mmc/mmc.c +++ b/drivers/mmc/mmc.c @@ -816,7 +816,7 @@ static void mmc_discover_cards(struct mmc_host *host) cmd.opcode = SD_SEND_RELATIVE_ADDR; cmd.arg = 0; - cmd.flags = MMC_RSP_R1; + cmd.flags = MMC_RSP_R6; err = mmc_wait_for_cmd(host, &cmd, CMD_RETRIES); if (err != MMC_ERR_NONE) diff --git a/include/linux/mmc/protocol.h b/include/linux/mmc/protocol.h index f819cae92266..a14dc306545b 100644 --- a/include/linux/mmc/protocol.h +++ b/include/linux/mmc/protocol.h @@ -63,7 +63,7 @@ /* class 5 */ #define MMC_ERASE_GROUP_START 35 /* ac [31:0] data addr R1 */ #define MMC_ERASE_GROUP_END 36 /* ac [31:0] data addr R1 */ -#define MMC_ERASE 37 /* ac R1b */ +#define MMC_ERASE 38 /* ac R1b */ /* class 9 */ #define MMC_FAST_IO 39 /* ac R4 */ @@ -74,7 +74,7 @@ /* class 8 */ #define MMC_APP_CMD 55 /* ac [31:16] RCA R1 */ -#define MMC_GEN_CMD 56 /* adtc [0] RD/WR R1b */ +#define MMC_GEN_CMD 56 /* adtc [0] RD/WR R1 */ /* SD commands type argument response */ /* class 8 */ -- cgit v1.2.3 From 6aab341e0a28aff100a09831c5300a2994b8b986 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 28 Nov 2005 14:34:23 -0800 Subject: mm: re-architect the VM_UNPAGED logic This replaces the (in my opinion horrible) VM_UNMAPPED logic with very explicit support for a "remapped page range" aka VM_PFNMAP. It allows a VM area to contain an arbitrary range of page table entries that the VM never touches, and never considers to be normal pages. Any user of "remap_pfn_range()" automatically gets this new functionality, and doesn't even have to mark the pages reserved or indeed mark them any other way. It just works. As a side effect, doing mmap() on /dev/mem works for arbitrary ranges. Sparc update from David in the next commit. Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/vdso.c | 6 +- drivers/char/mem.c | 2 +- fs/proc/task_mmu.c | 7 +- include/linux/mm.h | 5 +- mm/fremap.c | 22 ++---- mm/madvise.c | 2 +- mm/memory.c | 189 ++++++++++++++++++++++++--------------------- mm/mempolicy.c | 12 +-- mm/msync.c | 12 +-- mm/nommu.c | 2 +- mm/rmap.c | 14 +--- 11 files changed, 127 insertions(+), 146 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index b44b36e0c293..f0c47dab0903 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -145,8 +145,7 @@ static void dump_vdso_pages(struct vm_area_struct * vma) struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); struct page *upg = (vma && vma->vm_mm) ? - follow_page(vma->vm_mm, vma->vm_start + - i*PAGE_SIZE, 0) + follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0) : NULL; dump_one_vdso_page(pg, upg); } @@ -157,8 +156,7 @@ static void dump_vdso_pages(struct vm_area_struct * vma) struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); struct page *upg = (vma && vma->vm_mm) ? - follow_page(vma->vm_mm, vma->vm_start + - i*PAGE_SIZE, 0) + follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0) : NULL; dump_one_vdso_page(pg, upg); } diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 29c3b631445a..91dd669273e0 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -591,7 +591,7 @@ static inline size_t read_zero_pagealigned(char __user * buf, size_t size) if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0) goto out_up; - if (vma->vm_flags & (VM_SHARED | VM_HUGETLB | VM_UNPAGED)) + if (vma->vm_flags & (VM_SHARED | VM_HUGETLB)) break; count = vma->vm_end - addr; if (count > size) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 9ab97cef0daa..50bd5a8f0446 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -402,12 +402,11 @@ struct numa_maps { /* * Calculate numa node maps for a vma */ -static struct numa_maps *get_numa_maps(const struct vm_area_struct *vma) +static struct numa_maps *get_numa_maps(struct vm_area_struct *vma) { + int i; struct page *page; unsigned long vaddr; - struct mm_struct *mm = vma->vm_mm; - int i; struct numa_maps *md = kmalloc(sizeof(struct numa_maps), GFP_KERNEL); if (!md) @@ -420,7 +419,7 @@ static struct numa_maps *get_numa_maps(const struct vm_area_struct *vma) md->node[i] =0; for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) { - page = follow_page(mm, vaddr, 0); + page = follow_page(vma, vaddr, 0); if (page) { int count = page_mapcount(page); diff --git a/include/linux/mm.h b/include/linux/mm.h index f0cdfd18db55..6a75a7a78bf1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -145,7 +145,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_GROWSDOWN 0x00000100 /* general info on the segment */ #define VM_GROWSUP 0x00000200 #define VM_SHM 0x00000000 /* Means nothing: delete it later */ -#define VM_UNPAGED 0x00000400 /* Pages managed without map count */ +#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ #define VM_EXECUTABLE 0x00001000 @@ -664,6 +664,7 @@ struct zap_details { unsigned long truncate_count; /* Compare vm_truncate_count */ }; +struct page *vm_normal_page(struct vm_area_struct *, unsigned long, pte_t); unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *); unsigned long unmap_vmas(struct mmu_gather **tlb, @@ -953,7 +954,7 @@ unsigned long vmalloc_to_pfn(void *addr); int remap_pfn_range(struct vm_area_struct *, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t); -struct page *follow_page(struct mm_struct *, unsigned long address, +struct page *follow_page(struct vm_area_struct *, unsigned long address, unsigned int foll_flags); #define FOLL_WRITE 0x01 /* check pte is writable */ #define FOLL_TOUCH 0x02 /* mark page accessed */ diff --git a/mm/fremap.c b/mm/fremap.c index 007cbad9331e..f851775e09c2 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -27,24 +27,20 @@ static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page = NULL; if (pte_present(pte)) { - unsigned long pfn = pte_pfn(pte); - flush_cache_page(vma, addr, pfn); + flush_cache_page(vma, addr, pte_pfn(pte)); pte = ptep_clear_flush(vma, addr, ptep); - if (unlikely(!pfn_valid(pfn))) { - print_bad_pte(vma, pte, addr); - goto out; + page = vm_normal_page(vma, addr, pte); + if (page) { + if (pte_dirty(pte)) + set_page_dirty(page); + page_remove_rmap(page); + page_cache_release(page); } - page = pfn_to_page(pfn); - if (pte_dirty(pte)) - set_page_dirty(page); - page_remove_rmap(page); - page_cache_release(page); } else { if (!pte_file(pte)) free_swap_and_cache(pte_to_swp_entry(pte)); pte_clear(mm, addr, ptep); } -out: return !!page; } @@ -65,8 +61,6 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma, pte_t pte_val; spinlock_t *ptl; - BUG_ON(vma->vm_flags & VM_UNPAGED); - pgd = pgd_offset(mm, addr); pud = pud_alloc(mm, pgd, addr); if (!pud) @@ -122,8 +116,6 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, pte_t pte_val; spinlock_t *ptl; - BUG_ON(vma->vm_flags & VM_UNPAGED); - pgd = pgd_offset(mm, addr); pud = pud_alloc(mm, pgd, addr); if (!pud) diff --git a/mm/madvise.c b/mm/madvise.c index 328a3bcce527..2b7cf0400a21 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -126,7 +126,7 @@ static long madvise_dontneed(struct vm_area_struct * vma, unsigned long start, unsigned long end) { *prev = vma; - if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_UNPAGED)) + if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)) return -EINVAL; if (unlikely(vma->vm_flags & VM_NONLINEAR)) { diff --git a/mm/memory.c b/mm/memory.c index d1f46f4e4c8a..b57fbc636058 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -333,9 +333,9 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss) } /* - * This function is called to print an error when a pte in a - * !VM_UNPAGED region is found pointing to an invalid pfn (which - * is an error. + * This function is called to print an error when a bad pte + * is found. For example, we might have a PFN-mapped pte in + * a region that doesn't allow it. * * The calling function must still handle the error. */ @@ -350,19 +350,56 @@ void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr) } /* - * page_is_anon applies strict checks for an anonymous page belonging to - * this vma at this address. It is used on VM_UNPAGED vmas, which are - * usually populated with shared originals (which must not be counted), - * but occasionally contain private COWed copies (when !VM_SHARED, or - * perhaps via ptrace when VM_SHARED). An mmap of /dev/mem might window - * free pages, pages from other processes, or from other parts of this: - * it's tricky, but try not to be deceived by foreign anonymous pages. + * This function gets the "struct page" associated with a pte. + * + * NOTE! Some mappings do not have "struct pages". A raw PFN mapping + * will have each page table entry just pointing to a raw page frame + * number, and as far as the VM layer is concerned, those do not have + * pages associated with them - even if the PFN might point to memory + * that otherwise is perfectly fine and has a "struct page". + * + * The way we recognize those mappings is through the rules set up + * by "remap_pfn_range()": the vma will have the VM_PFNMAP bit set, + * and the vm_pgoff will point to the first PFN mapped: thus every + * page that is a raw mapping will always honor the rule + * + * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT) + * + * and if that isn't true, the page has been COW'ed (in which case it + * _does_ have a "struct page" associated with it even if it is in a + * VM_PFNMAP range). */ -static inline int page_is_anon(struct page *page, - struct vm_area_struct *vma, unsigned long addr) +struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte) { - return page && PageAnon(page) && page_mapped(page) && - page_address_in_vma(page, vma) == addr; + unsigned long pfn = pte_pfn(pte); + + if (vma->vm_flags & VM_PFNMAP) { + unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT; + if (pfn == vma->vm_pgoff + off) + return NULL; + } + + /* + * Add some anal sanity checks for now. Eventually, + * we should just do "return pfn_to_page(pfn)", but + * in the meantime we check that we get a valid pfn, + * and that the resulting page looks ok. + * + * Remove this test eventually! + */ + if (unlikely(!pfn_valid(pfn))) { + print_bad_pte(vma, pte, addr); + return NULL; + } + + /* + * NOTE! We still have PageReserved() pages in the page + * tables. + * + * The PAGE_ZERO() pages and various VDSO mappings can + * cause them to exist. + */ + return pfn_to_page(pfn); } /* @@ -379,7 +416,6 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, unsigned long vm_flags = vma->vm_flags; pte_t pte = *src_pte; struct page *page; - unsigned long pfn; /* pte contains position in swap or file, so copy. */ if (unlikely(!pte_present(pte))) { @@ -397,22 +433,6 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, goto out_set_pte; } - pfn = pte_pfn(pte); - page = pfn_valid(pfn)? pfn_to_page(pfn): NULL; - - if (unlikely(vm_flags & VM_UNPAGED)) - if (!page_is_anon(page, vma, addr)) - goto out_set_pte; - - /* - * If the pte points outside of valid memory but - * the region is not VM_UNPAGED, we have a problem. - */ - if (unlikely(!page)) { - print_bad_pte(vma, pte, addr); - goto out_set_pte; /* try to do something sane */ - } - /* * If it's a COW mapping, write protect it both * in the parent and the child @@ -429,9 +449,13 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (vm_flags & VM_SHARED) pte = pte_mkclean(pte); pte = pte_mkold(pte); - get_page(page); - page_dup_rmap(page); - rss[!!PageAnon(page)]++; + + page = vm_normal_page(vma, addr, pte); + if (page) { + get_page(page); + page_dup_rmap(page); + rss[!!PageAnon(page)]++; + } out_set_pte: set_pte_at(dst_mm, addr, dst_pte, pte); @@ -543,7 +567,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, * readonly mappings. The tradeoff is that copy_page_range is more * efficient than faulting. */ - if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_UNPAGED))) { + if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP))) { if (!vma->anon_vma) return 0; } @@ -584,19 +608,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, } if (pte_present(ptent)) { struct page *page; - unsigned long pfn; (*zap_work) -= PAGE_SIZE; - pfn = pte_pfn(ptent); - page = pfn_valid(pfn)? pfn_to_page(pfn): NULL; - - if (unlikely(vma->vm_flags & VM_UNPAGED)) { - if (!page_is_anon(page, vma, addr)) - page = NULL; - } else if (unlikely(!page)) - print_bad_pte(vma, ptent, addr); - + page = vm_normal_page(vma, addr, ptent); if (unlikely(details) && page) { /* * unmap_shared_mapping_pages() wants to @@ -852,7 +867,7 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, /* * Do a quick page-table lookup for a single page. */ -struct page *follow_page(struct mm_struct *mm, unsigned long address, +struct page *follow_page(struct vm_area_struct *vma, unsigned long address, unsigned int flags) { pgd_t *pgd; @@ -860,8 +875,8 @@ struct page *follow_page(struct mm_struct *mm, unsigned long address, pmd_t *pmd; pte_t *ptep, pte; spinlock_t *ptl; - unsigned long pfn; struct page *page; + struct mm_struct *mm = vma->vm_mm; page = follow_huge_addr(mm, address, flags & FOLL_WRITE); if (!IS_ERR(page)) { @@ -897,11 +912,10 @@ struct page *follow_page(struct mm_struct *mm, unsigned long address, goto unlock; if ((flags & FOLL_WRITE) && !pte_write(pte)) goto unlock; - pfn = pte_pfn(pte); - if (!pfn_valid(pfn)) + page = vm_normal_page(vma, address, pte); + if (unlikely(!page)) goto unlock; - page = pfn_to_page(pfn); if (flags & FOLL_GET) get_page(page); if (flags & FOLL_TOUCH) { @@ -974,8 +988,10 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, return i ? : -EFAULT; } if (pages) { - pages[i] = pte_page(*pte); - get_page(pages[i]); + struct page *page = vm_normal_page(vma, start, *pte); + pages[i] = page; + if (page) + get_page(page); } pte_unmap(pte); if (vmas) @@ -1010,7 +1026,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, foll_flags |= FOLL_WRITE; cond_resched(); - while (!(page = follow_page(mm, start, foll_flags))) { + while (!(page = follow_page(vma, start, foll_flags))) { int ret; ret = __handle_mm_fault(mm, vma, start, foll_flags & FOLL_WRITE); @@ -1214,11 +1230,12 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, * in 2.6 the LRU scan won't even find its pages, so this * flag means no more than count its pages in reserved_vm, * and omit it from core dump, even when VM_IO turned off. - * VM_UNPAGED tells the core MM not to "manage" these pages - * (e.g. refcount, mapcount, try to swap them out): in - * particular, zap_pte_range does not try to free them. + * VM_PFNMAP tells the core MM that the base pages are just + * raw PFN mappings, and do not have a "struct page" associated + * with them. */ - vma->vm_flags |= VM_IO | VM_RESERVED | VM_UNPAGED; + vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; + vma->vm_pgoff = pfn; BUG_ON(addr >= end); pfn -= addr >> PAGE_SHIFT; @@ -1273,6 +1290,26 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) return pte; } +static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va) +{ + /* + * If the source page was a PFN mapping, we don't have + * a "struct page" for it. We do a best-effort copy by + * just copying from the original user address. If that + * fails, we just zero-fill it. Live with it. + */ + if (unlikely(!src)) { + void *kaddr = kmap_atomic(dst, KM_USER0); + unsigned long left = __copy_from_user_inatomic(kaddr, (void __user *)va, PAGE_SIZE); + if (left) + memset(kaddr, 0, PAGE_SIZE); + kunmap_atomic(kaddr, KM_USER0); + return; + + } + copy_user_highpage(dst, src, va); +} + /* * This routine handles present pages, when users try to write * to a shared page. It is done by copying the page to a new address @@ -1296,28 +1333,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, spinlock_t *ptl, pte_t orig_pte) { struct page *old_page, *src_page, *new_page; - unsigned long pfn = pte_pfn(orig_pte); pte_t entry; int ret = VM_FAULT_MINOR; - if (unlikely(!pfn_valid(pfn))) { - /* - * Page table corrupted: show pte and kill process. - * Or it's an attempt to COW an out-of-map VM_UNPAGED - * entry, which copy_user_highpage does not support. - */ - print_bad_pte(vma, orig_pte, address); - ret = VM_FAULT_OOM; - goto unlock; - } - old_page = pfn_to_page(pfn); + old_page = vm_normal_page(vma, address, orig_pte); src_page = old_page; - - if (unlikely(vma->vm_flags & VM_UNPAGED)) - if (!page_is_anon(old_page, vma, address)) { - old_page = NULL; - goto gotten; - } + if (!old_page) + goto gotten; if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { int reuse = can_share_swap_page(old_page); @@ -1351,7 +1373,7 @@ gotten: new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); if (!new_page) goto oom; - copy_user_highpage(new_page, src_page, address); + cow_user_page(new_page, src_page, address); } /* @@ -1812,16 +1834,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, spinlock_t *ptl; pte_t entry; - /* - * A VM_UNPAGED vma will normally be filled with present ptes - * by remap_pfn_range, and never arrive here; but it might have - * holes, or if !VM_DONTEXPAND, mremap might have expanded it. - * It's weird enough handling anon pages in unpaged vmas, we do - * not want to worry about ZERO_PAGEs too (it may or may not - * matter if their counts wrap): just give them anon pages. - */ - - if (write_access || (vma->vm_flags & VM_UNPAGED)) { + if (write_access) { /* Allocate our own private page. */ pte_unmap(page_table); @@ -1896,8 +1909,6 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, int anon = 0; pte_unmap(page_table); - BUG_ON(vma->vm_flags & VM_UNPAGED); - if (vma->vm_file) { mapping = vma->vm_file->f_mapping; sequence = mapping->truncate_count; @@ -1930,7 +1941,7 @@ retry: page = alloc_page_vma(GFP_HIGHUSER, vma, address); if (!page) goto oom; - copy_user_highpage(page, new_page, address); + cow_user_page(page, new_page, address); page_cache_release(new_page); new_page = page; anon = 1; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 5609a31bdf22..bec88c81244e 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -189,17 +189,15 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); do { - unsigned long pfn; + struct page *page; unsigned int nid; if (!pte_present(*pte)) continue; - pfn = pte_pfn(*pte); - if (!pfn_valid(pfn)) { - print_bad_pte(vma, *pte, addr); + page = vm_normal_page(vma, addr, *pte); + if (!page) continue; - } - nid = pfn_to_nid(pfn); + nid = page_to_nid(page); if (!node_isset(nid, *nodes)) break; } while (pte++, addr += PAGE_SIZE, addr != end); @@ -269,8 +267,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, first = find_vma(mm, start); if (!first) return ERR_PTR(-EFAULT); - if (first->vm_flags & VM_UNPAGED) - return ERR_PTR(-EACCES); prev = NULL; for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { if (!vma->vm_next && vma->vm_end < end) diff --git a/mm/msync.c b/mm/msync.c index b3f4caf3010b..1b5b6f662dcf 100644 --- a/mm/msync.c +++ b/mm/msync.c @@ -27,7 +27,6 @@ static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, again: pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); do { - unsigned long pfn; struct page *page; if (progress >= 64) { @@ -40,13 +39,9 @@ again: continue; if (!pte_maybe_dirty(*pte)) continue; - pfn = pte_pfn(*pte); - if (unlikely(!pfn_valid(pfn))) { - print_bad_pte(vma, *pte, addr); + page = vm_normal_page(vma, addr, *pte); + if (!page) continue; - } - page = pfn_to_page(pfn); - if (ptep_clear_flush_dirty(vma, addr, pte) || page_test_and_clear_dirty(page)) set_page_dirty(page); @@ -97,9 +92,8 @@ static void msync_page_range(struct vm_area_struct *vma, /* For hugepages we can't go walking the page table normally, * but that's ok, hugetlbfs is memory based, so we don't need * to do anything more on an msync(). - * Can't do anything with VM_UNPAGED regions either. */ - if (vma->vm_flags & (VM_HUGETLB|VM_UNPAGED)) + if (vma->vm_flags & VM_HUGETLB) return; BUG_ON(addr >= end); diff --git a/mm/nommu.c b/mm/nommu.c index 6deb6ab3d6ad..c1196812876b 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1045,7 +1045,7 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) EXPORT_SYMBOL(find_vma); -struct page *follow_page(struct mm_struct *mm, unsigned long address, +struct page *follow_page(struct vm_area_struct *vma, unsigned long address, unsigned int foll_flags) { return NULL; diff --git a/mm/rmap.c b/mm/rmap.c index 2e034a0b89ab..6389cda02a20 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -226,8 +226,6 @@ vma_address(struct page *page, struct vm_area_struct *vma) /* * At what user virtual address is page expected in vma? checking that the * page matches the vma: currently only used on anon pages, by unuse_vma; - * and by extraordinary checks on anon pages in VM_UNPAGED vmas, taking - * care that an mmap of /dev/mem might window free and foreign pages. */ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) { @@ -614,7 +612,6 @@ static void try_to_unmap_cluster(unsigned long cursor, struct page *page; unsigned long address; unsigned long end; - unsigned long pfn; address = (vma->vm_start + cursor) & CLUSTER_MASK; end = address + CLUSTER_SIZE; @@ -643,15 +640,8 @@ static void try_to_unmap_cluster(unsigned long cursor, for (; address < end; pte++, address += PAGE_SIZE) { if (!pte_present(*pte)) continue; - - pfn = pte_pfn(*pte); - if (unlikely(!pfn_valid(pfn))) { - print_bad_pte(vma, *pte, address); - continue; - } - - page = pfn_to_page(pfn); - BUG_ON(PageAnon(page)); + page = vm_normal_page(vma, address, *pte); + BUG_ON(!page || PageAnon(page)); if (ptep_clear_flush_young(vma, address, pte)) continue; -- cgit v1.2.3 From a9d9baa1e819b2f92f9cfa5240f766c535e636a6 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Mon, 28 Nov 2005 13:43:46 -0800 Subject: [PATCH] clean up lock_cpu_hotplug() in cpufreq There are some callers in cpufreq hotplug notify path that the lowest function calls lock_cpu_hotplug(). The lock is already held during cpu_up() and cpu_down() calls when the notify calls are broadcast to registered clients. Ideally if possible, we could disable_preempt() at the highest caller and make sure we dont sleep in the path down in cpufreq->driver_target() calls but the calls are so intertwined and cumbersome to cleanup. Hence we consistently use lock_cpu_hotplug() and unlock_cpu_hotplug() in all places. - Removed export of cpucontrol semaphore and made it static. - removed explicit uses of up/down with lock_cpu_hotplug() so we can keep track of the the callers in same thread context and just keep refcounts without calling a down() that causes a deadlock. - Removed current_in_hotplug() uses - Removed PF_HOTPLUG_CPU in sched.h introduced for the current_in_hotplug() temporary workaround. Tested with insmod of cpufreq_stat.ko, and logical online/offline to make sure we dont have any hang situations. Signed-off-by: Ashok Raj Cc: Zwane Mwaikambo Cc: Shaohua Li Cc: "Siddha, Suresh B" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/cpufreq/cpufreq.c | 12 ++----- include/linux/cpu.h | 7 ++-- include/linux/sched.h | 1 - kernel/cpu.c | 83 ++++++++++++++++++++++++++++------------------- 4 files changed, 54 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1c0f62d0f938..815902c2c856 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1113,21 +1113,13 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, { int retval = -EINVAL; - /* - * If we are already in context of hotplug thread, we dont need to - * acquire the hotplug lock. Otherwise acquire cpucontrol to prevent - * hotplug from removing this cpu that we are working on. - */ - if (!current_in_cpu_hotplug()) - lock_cpu_hotplug(); - + lock_cpu_hotplug(); dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu, target_freq, relation); if (cpu_online(policy->cpu) && cpufreq_driver->target) retval = cpufreq_driver->target(policy, target_freq, relation); - if (!current_in_cpu_hotplug()) - unlock_cpu_hotplug(); + unlock_cpu_hotplug(); return retval; } diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 43c44530ef9d..0ed1d4853c69 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -65,10 +65,9 @@ extern struct sysdev_class cpu_sysdev_class; #ifdef CONFIG_HOTPLUG_CPU /* Stop CPUs going up and down. */ -extern struct semaphore cpucontrol; -#define lock_cpu_hotplug() down(&cpucontrol) -#define unlock_cpu_hotplug() up(&cpucontrol) -#define lock_cpu_hotplug_interruptible() down_interruptible(&cpucontrol) +extern void lock_cpu_hotplug(void); +extern void unlock_cpu_hotplug(void); +extern int lock_cpu_hotplug_interruptible(void); #define hotcpu_notifier(fn, pri) { \ static struct notifier_block fn##_nb = \ { .notifier_call = fn, .priority = pri }; \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 2038bd27b041..b0ad6f30679e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -908,7 +908,6 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0) #define PF_SYNCWRITE 0x00200000 /* I am doing a sync write */ #define PF_BORROWED_MM 0x00400000 /* I am a kthread doing use_mm */ #define PF_RANDOMIZE 0x00800000 /* randomize virtual address space */ -#define PF_HOTPLUG_CPU 0x01000000 /* Currently performing CPU hotplug */ /* * Only the _current_ task can read/write to tsk->flags, but other diff --git a/kernel/cpu.c b/kernel/cpu.c index d61ba88f34e5..e882c6babf41 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -16,47 +16,76 @@ #include /* This protects CPUs going up and down... */ -DECLARE_MUTEX(cpucontrol); -EXPORT_SYMBOL_GPL(cpucontrol); +static DECLARE_MUTEX(cpucontrol); static struct notifier_block *cpu_chain; -/* - * Used to check by callers if they need to acquire the cpucontrol - * or not to protect a cpu from being removed. Its sometimes required to - * call these functions both for normal operations, and in response to - * a cpu being added/removed. If the context of the call is in the same - * thread context as a CPU hotplug thread, we dont need to take the lock - * since its already protected - * check drivers/cpufreq/cpufreq.c for its usage - Ashok Raj - */ +#ifdef CONFIG_HOTPLUG_CPU +static struct task_struct *lock_cpu_hotplug_owner; +static int lock_cpu_hotplug_depth; -int current_in_cpu_hotplug(void) +static int __lock_cpu_hotplug(int interruptible) { - return (current->flags & PF_HOTPLUG_CPU); + int ret = 0; + + if (lock_cpu_hotplug_owner != current) { + if (interruptible) + ret = down_interruptible(&cpucontrol); + else + down(&cpucontrol); + } + + /* + * Set only if we succeed in locking + */ + if (!ret) { + lock_cpu_hotplug_depth++; + lock_cpu_hotplug_owner = current; + } + + return ret; } -EXPORT_SYMBOL_GPL(current_in_cpu_hotplug); +void lock_cpu_hotplug(void) +{ + __lock_cpu_hotplug(0); +} +EXPORT_SYMBOL_GPL(lock_cpu_hotplug); +void unlock_cpu_hotplug(void) +{ + if (--lock_cpu_hotplug_depth == 0) { + lock_cpu_hotplug_owner = NULL; + up(&cpucontrol); + } +} +EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); + +int lock_cpu_hotplug_interruptible(void) +{ + return __lock_cpu_hotplug(1); +} +EXPORT_SYMBOL_GPL(lock_cpu_hotplug_interruptible); +#endif /* CONFIG_HOTPLUG_CPU */ /* Need to know about CPUs going up/down? */ int register_cpu_notifier(struct notifier_block *nb) { int ret; - if ((ret = down_interruptible(&cpucontrol)) != 0) + if ((ret = lock_cpu_hotplug_interruptible()) != 0) return ret; ret = notifier_chain_register(&cpu_chain, nb); - up(&cpucontrol); + unlock_cpu_hotplug(); return ret; } EXPORT_SYMBOL(register_cpu_notifier); void unregister_cpu_notifier(struct notifier_block *nb) { - down(&cpucontrol); + lock_cpu_hotplug(); notifier_chain_unregister(&cpu_chain, nb); - up(&cpucontrol); + unlock_cpu_hotplug(); } EXPORT_SYMBOL(unregister_cpu_notifier); @@ -112,13 +141,6 @@ int cpu_down(unsigned int cpu) goto out; } - /* - * Leave a trace in current->flags indicating we are already in - * process of performing CPU hotplug. Callers can check if cpucontrol - * is already acquired by current thread, and if so not cause - * a dead lock by not acquiring the lock - */ - current->flags |= PF_HOTPLUG_CPU; err = notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, (void *)(long)cpu); if (err == NOTIFY_BAD) { @@ -171,7 +193,6 @@ out_thread: out_allowed: set_cpus_allowed(current, old_allowed); out: - current->flags &= ~PF_HOTPLUG_CPU; unlock_cpu_hotplug(); return err; } @@ -182,7 +203,7 @@ int __devinit cpu_up(unsigned int cpu) int ret; void *hcpu = (void *)(long)cpu; - if ((ret = down_interruptible(&cpucontrol)) != 0) + if ((ret = lock_cpu_hotplug_interruptible()) != 0) return ret; if (cpu_online(cpu) || !cpu_present(cpu)) { @@ -190,11 +211,6 @@ int __devinit cpu_up(unsigned int cpu) goto out; } - /* - * Leave a trace in current->flags indicating we are already in - * process of performing CPU hotplug. - */ - current->flags |= PF_HOTPLUG_CPU; ret = notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); if (ret == NOTIFY_BAD) { printk("%s: attempt to bring up CPU %u failed\n", @@ -217,7 +233,6 @@ out_notify: if (ret != 0) notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu); out: - current->flags &= ~PF_HOTPLUG_CPU; - up(&cpucontrol); + unlock_cpu_hotplug(); return ret; } -- cgit v1.2.3 From ff88a3b2f56ae4f3296ea957ea38f99f8bd0e5a8 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 28 Nov 2005 13:43:47 -0800 Subject: [PATCH] memory_sysdev_class is static So don't define it as extern in the header file. drivers/base/memory.c:28: error: static declaration of 'memory_sysdev_class' follows non-static declaration include/linux/memory.h:88: error: previous declaration of 'memory_sysdev_class' was here Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memory.h b/include/linux/memory.h index 9a424383e6c6..dc4081b6f161 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -85,7 +85,6 @@ struct notifier_block; extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); -extern struct sysdev_class memory_sysdev_class; #endif /* CONFIG_MEMORY_HOTPLUG */ #define hotplug_memory_notifier(fn, pri) { \ -- cgit v1.2.3 From f7b7fd8f3ebbb2810d6893295aa984acd0fd30db Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 28 Nov 2005 13:44:07 -0800 Subject: [PATCH] temporarily disable swap token on memory pressure Some users (hi Zwane) have seen a problem when running a workload that eats nearly all of physical memory - th system does an OOM kill, even when there is still a lot of swap free. The problem appears to be a very big task that is holding the swap token, and the VM has a very hard time finding any other page in the system that is swappable. Instead of ignoring the swap token when sc->priority reaches 0, we could simply take the swap token away from the memory hog and make sure we don't give it back to the memory hog for a few seconds. This patch resolves the problem Zwane ran into. Signed-off-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 4 ++-- include/linux/swap.h | 6 ++++++ mm/rmap.c | 26 ++++++++++---------------- mm/thrash.c | 10 +++++++--- mm/vmscan.c | 11 +++++++++-- 5 files changed, 34 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 35b30e6c8cf8..33261f1d2239 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -89,7 +89,7 @@ static inline void page_dup_rmap(struct page *page) /* * Called from mm/vmscan.c to handle paging out */ -int page_referenced(struct page *, int is_locked, int ignore_token); +int page_referenced(struct page *, int is_locked); int try_to_unmap(struct page *); /* @@ -109,7 +109,7 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); #define anon_vma_prepare(vma) (0) #define anon_vma_link(vma) do {} while (0) -#define page_referenced(page,l,i) TestClearPageReferenced(page) +#define page_referenced(page,l) TestClearPageReferenced(page) #define try_to_unmap(page) SWAP_FAIL #endif /* CONFIG_MMU */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 20c975642cab..508668f840b6 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -239,6 +239,11 @@ static inline void put_swap_token(struct mm_struct *mm) __put_swap_token(mm); } +static inline void disable_swap_token(void) +{ + put_swap_token(swap_token_mm); +} + #else /* CONFIG_SWAP */ #define total_swap_pages 0 @@ -283,6 +288,7 @@ static inline swp_entry_t get_swap_page(void) #define put_swap_token(x) do { } while(0) #define grab_swap_token() do { } while(0) #define has_swap_token(x) 0 +#define disable_swap_token() do { } while(0) #endif /* CONFIG_SWAP */ #endif /* __KERNEL__*/ diff --git a/mm/rmap.c b/mm/rmap.c index 6389cda02a20..491ac350048f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -290,7 +290,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm, * repeatedly from either page_referenced_anon or page_referenced_file. */ static int page_referenced_one(struct page *page, - struct vm_area_struct *vma, unsigned int *mapcount, int ignore_token) + struct vm_area_struct *vma, unsigned int *mapcount) { struct mm_struct *mm = vma->vm_mm; unsigned long address; @@ -311,7 +311,7 @@ static int page_referenced_one(struct page *page, /* Pretend the page is referenced if the task has the swap token and is in the middle of a page fault. */ - if (mm != current->mm && !ignore_token && has_swap_token(mm) && + if (mm != current->mm && has_swap_token(mm) && rwsem_is_locked(&mm->mmap_sem)) referenced++; @@ -321,7 +321,7 @@ out: return referenced; } -static int page_referenced_anon(struct page *page, int ignore_token) +static int page_referenced_anon(struct page *page) { unsigned int mapcount; struct anon_vma *anon_vma; @@ -334,8 +334,7 @@ static int page_referenced_anon(struct page *page, int ignore_token) mapcount = page_mapcount(page); list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { - referenced += page_referenced_one(page, vma, &mapcount, - ignore_token); + referenced += page_referenced_one(page, vma, &mapcount); if (!mapcount) break; } @@ -354,7 +353,7 @@ static int page_referenced_anon(struct page *page, int ignore_token) * * This function is only called from page_referenced for object-based pages. */ -static int page_referenced_file(struct page *page, int ignore_token) +static int page_referenced_file(struct page *page) { unsigned int mapcount; struct address_space *mapping = page->mapping; @@ -392,8 +391,7 @@ static int page_referenced_file(struct page *page, int ignore_token) referenced++; break; } - referenced += page_referenced_one(page, vma, &mapcount, - ignore_token); + referenced += page_referenced_one(page, vma, &mapcount); if (!mapcount) break; } @@ -410,13 +408,10 @@ static int page_referenced_file(struct page *page, int ignore_token) * Quick test_and_clear_referenced for all mappings to a page, * returns the number of ptes which referenced the page. */ -int page_referenced(struct page *page, int is_locked, int ignore_token) +int page_referenced(struct page *page, int is_locked) { int referenced = 0; - if (!swap_token_default_timeout) - ignore_token = 1; - if (page_test_and_clear_young(page)) referenced++; @@ -425,15 +420,14 @@ int page_referenced(struct page *page, int is_locked, int ignore_token) if (page_mapped(page) && page->mapping) { if (PageAnon(page)) - referenced += page_referenced_anon(page, ignore_token); + referenced += page_referenced_anon(page); else if (is_locked) - referenced += page_referenced_file(page, ignore_token); + referenced += page_referenced_file(page); else if (TestSetPageLocked(page)) referenced++; else { if (page->mapping) - referenced += page_referenced_file(page, - ignore_token); + referenced += page_referenced_file(page); unlock_page(page); } } diff --git a/mm/thrash.c b/mm/thrash.c index eff3c18c33a1..f4c560b4a2b7 100644 --- a/mm/thrash.c +++ b/mm/thrash.c @@ -57,14 +57,17 @@ void grab_swap_token(void) /* We have the token. Let others know we still need it. */ if (has_swap_token(current->mm)) { current->mm->recent_pagein = 1; + if (unlikely(!swap_token_default_timeout)) + disable_swap_token(); return; } if (time_after(jiffies, swap_token_check)) { - /* Can't get swapout protection if we exceed our RSS limit. */ - // if (current->mm->rss > current->mm->rlimit_rss) - // return; + if (!swap_token_default_timeout) { + swap_token_check = jiffies + SWAP_TOKEN_CHECK_INTERVAL; + return; + } /* ... or if we recently held the token. */ if (time_before(jiffies, current->mm->swap_token_time)) @@ -95,6 +98,7 @@ void __put_swap_token(struct mm_struct *mm) { spin_lock(&swap_token_lock); if (likely(mm == swap_token_mm)) { + mm->swap_token_time = jiffies + SWAP_TOKEN_CHECK_INTERVAL; swap_token_mm = &init_mm; swap_token_check = jiffies; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 28130541270f..078cf920208a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -407,7 +407,7 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) if (PageWriteback(page)) goto keep_locked; - referenced = page_referenced(page, 1, sc->priority <= 0); + referenced = page_referenced(page, 1); /* In active use or really unfreeable? Activate it. */ if (referenced && page_mapping_inuse(page)) goto activate_locked; @@ -756,7 +756,7 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) if (page_mapped(page)) { if (!reclaim_mapped || (total_swap_pages == 0 && PageAnon(page)) || - page_referenced(page, 0, sc->priority <= 0)) { + page_referenced(page, 0)) { list_add(&page->lru, &l_active); continue; } @@ -960,6 +960,8 @@ int try_to_free_pages(struct zone **zones, gfp_t gfp_mask) sc.nr_reclaimed = 0; sc.priority = priority; sc.swap_cluster_max = SWAP_CLUSTER_MAX; + if (!priority) + disable_swap_token(); shrink_caches(zones, &sc); shrink_slab(sc.nr_scanned, gfp_mask, lru_pages); if (reclaim_state) { @@ -1056,6 +1058,10 @@ loop_again: int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ unsigned long lru_pages = 0; + /* The swap token gets in the way of swapout... */ + if (!priority) + disable_swap_token(); + all_zones_ok = 1; if (nr_pages == 0) { @@ -1360,6 +1366,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) sc.nr_reclaimed = 0; /* scan at the highest priority */ sc.priority = 0; + disable_swap_token(); if (nr_pages > SWAP_CLUSTER_MAX) sc.swap_cluster_max = nr_pages; -- cgit v1.2.3