diff options
Diffstat (limited to 'include')
| -rw-r--r-- | include/asm-generic/io.h | 2 | ||||
| -rw-r--r-- | include/asm-generic/tlb.h | 68 | ||||
| -rw-r--r-- | include/drm/gpu_scheduler.h | 4 | ||||
| -rw-r--r-- | include/linux/cpuhotplug.h | 1 | ||||
| -rw-r--r-- | include/linux/fanotify.h | 14 | ||||
| -rw-r--r-- | include/linux/fsnotify_backend.h | 89 | ||||
| -rw-r--r-- | include/linux/mm.h | 14 | ||||
| -rw-r--r-- | include/linux/stmmac.h | 1 | ||||
| -rw-r--r-- | include/net/addrconf.h | 3 | ||||
| -rw-r--r-- | include/net/amt.h | 20 | ||||
| -rw-r--r-- | include/net/bluetooth/l2cap.h | 1 | ||||
| -rw-r--r-- | include/net/inet_connection_sock.h | 10 | ||||
| -rw-r--r-- | include/net/inet_hashtables.h | 2 | ||||
| -rw-r--r-- | include/net/inet_sock.h | 9 | ||||
| -rw-r--r-- | include/net/ip.h | 6 | ||||
| -rw-r--r-- | include/net/protocol.h | 4 | ||||
| -rw-r--r-- | include/net/route.h | 2 | ||||
| -rw-r--r-- | include/net/sock.h | 8 | ||||
| -rw-r--r-- | include/net/tcp.h | 22 | ||||
| -rw-r--r-- | include/net/udp.h | 4 | ||||
| -rw-r--r-- | include/trace/events/dlm.h | 118 | ||||
| -rw-r--r-- | include/uapi/asm-generic/fcntl.h | 2 | ||||
| -rw-r--r-- | include/uapi/linux/fanotify.h | 8 | ||||
| -rw-r--r-- | include/uapi/linux/kvm.h | 2 |
24 files changed, 301 insertions, 113 deletions
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 7ce93aaf69f8..98954dda5734 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -1125,9 +1125,7 @@ static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer, } #endif -#ifndef CONFIG_GENERIC_DEVMEM_IS_ALLOWED extern int devmem_is_allowed(unsigned long pfn); -#endif #endif /* __KERNEL__ */ diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index ff3e82553a76..492dce43236e 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -158,9 +158,24 @@ * Useful if your architecture doesn't use IPIs for remote TLB invalidates * and therefore doesn't naturally serialize with software page-table walkers. * + * MMU_GATHER_NO_FLUSH_CACHE + * + * Indicates the architecture has flush_cache_range() but it needs *NOT* be called + * before unmapping a VMA. + * + * NOTE: strictly speaking we shouldn't have this knob and instead rely on + * flush_cache_range() being a NOP, except Sparc64 seems to be + * different here. + * + * MMU_GATHER_MERGE_VMAS + * + * Indicates the architecture wants to merge ranges over VMAs; typical when + * multiple range invalidates are more expensive than a full invalidate. + * * MMU_GATHER_NO_RANGE * - * Use this if your architecture lacks an efficient flush_tlb_range(). + * Use this if your architecture lacks an efficient flush_tlb_range(). This + * option implies MMU_GATHER_MERGE_VMAS above. * * MMU_GATHER_NO_GATHER * @@ -288,6 +303,7 @@ struct mmu_gather { */ unsigned int vma_exec : 1; unsigned int vma_huge : 1; + unsigned int vma_pfn : 1; unsigned int batch_count; @@ -334,8 +350,8 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb) #ifdef CONFIG_MMU_GATHER_NO_RANGE -#if defined(tlb_flush) || defined(tlb_start_vma) || defined(tlb_end_vma) -#error MMU_GATHER_NO_RANGE relies on default tlb_flush(), tlb_start_vma() and tlb_end_vma() +#if defined(tlb_flush) +#error MMU_GATHER_NO_RANGE relies on default tlb_flush() #endif /* @@ -352,20 +368,9 @@ static inline void tlb_flush(struct mmu_gather *tlb) flush_tlb_mm(tlb->mm); } -static inline void -tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } - -#define tlb_end_vma tlb_end_vma -static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { } - #else /* CONFIG_MMU_GATHER_NO_RANGE */ #ifndef tlb_flush - -#if defined(tlb_start_vma) || defined(tlb_end_vma) -#error Default tlb_flush() relies on default tlb_start_vma() and tlb_end_vma() -#endif - /* * When an architecture does not provide its own tlb_flush() implementation * but does have a reasonably efficient flush_vma_range() implementation @@ -385,6 +390,9 @@ static inline void tlb_flush(struct mmu_gather *tlb) flush_tlb_range(&vma, tlb->start, tlb->end); } } +#endif + +#endif /* CONFIG_MMU_GATHER_NO_RANGE */ static inline void tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) @@ -402,17 +410,9 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) */ tlb->vma_huge = is_vm_hugetlb_page(vma); tlb->vma_exec = !!(vma->vm_flags & VM_EXEC); + tlb->vma_pfn = !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)); } -#else - -static inline void -tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } - -#endif - -#endif /* CONFIG_MMU_GATHER_NO_RANGE */ - static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) { /* @@ -486,32 +486,36 @@ static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb) * case where we're doing a full MM flush. When we're doing a munmap, * the vmas are adjusted to only cover the region to be torn down. */ -#ifndef tlb_start_vma static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { if (tlb->fullmm) return; tlb_update_vma_flags(tlb, vma); +#ifndef CONFIG_MMU_GATHER_NO_FLUSH_CACHE flush_cache_range(vma, vma->vm_start, vma->vm_end); -} #endif +} -#ifndef tlb_end_vma static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { if (tlb->fullmm) return; /* - * Do a TLB flush and reset the range at VMA boundaries; this avoids - * the ranges growing with the unused space between consecutive VMAs, - * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on - * this. + * VM_PFNMAP is more fragile because the core mm will not track the + * page mapcount -- there might not be page-frames for these PFNs after + * all. Force flush TLBs for such ranges to avoid munmap() vs + * unmap_mapping_range() races. */ - tlb_flush_mmu_tlbonly(tlb); + if (tlb->vma_pfn || !IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS)) { + /* + * Do a TLB flush and reset the range at VMA boundaries; this avoids + * the ranges growing with the unused space between consecutive VMAs. + */ + tlb_flush_mmu_tlbonly(tlb); + } } -#endif /* * tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end, diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 0fca8f38bee4..addb135eeea6 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -28,7 +28,7 @@ #include <linux/dma-fence.h> #include <linux/completion.h> #include <linux/xarray.h> -#include <linux/irq_work.h> +#include <linux/workqueue.h> #define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000) @@ -295,7 +295,7 @@ struct drm_sched_job { */ union { struct dma_fence_cb finish_cb; - struct irq_work work; + struct work_struct work; }; uint64_t id; diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 19f0dbfdd7fe..b66c5f389159 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -130,7 +130,6 @@ enum cpuhp_state { CPUHP_ZCOMP_PREPARE, CPUHP_TIMERS_PREPARE, CPUHP_MIPS_SOC_PREPARE, - CPUHP_LOONGARCH_SOC_PREPARE, CPUHP_BP_PREPARE_DYN, CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, CPUHP_BRINGUP_CPU, diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index e517dbcf74ed..8ad743def6f3 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -59,15 +59,19 @@ #define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \ FAN_MARK_FILESYSTEM) +#define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \ + FAN_MARK_FLUSH) + +#define FANOTIFY_MARK_IGNORE_BITS (FAN_MARK_IGNORED_MASK | \ + FAN_MARK_IGNORE) + #define FANOTIFY_MARK_FLAGS (FANOTIFY_MARK_TYPE_BITS | \ - FAN_MARK_ADD | \ - FAN_MARK_REMOVE | \ + FANOTIFY_MARK_CMD_BITS | \ + FANOTIFY_MARK_IGNORE_BITS | \ FAN_MARK_DONT_FOLLOW | \ FAN_MARK_ONLYDIR | \ - FAN_MARK_IGNORED_MASK | \ FAN_MARK_IGNORED_SURV_MODIFY | \ - FAN_MARK_EVICTABLE | \ - FAN_MARK_FLUSH) + FAN_MARK_EVICTABLE) /* * Events that can be reported with data type FSNOTIFY_EVENT_PATH. diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 9560734759fa..d7d96c806bff 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -518,8 +518,8 @@ struct fsnotify_mark { struct hlist_node obj_list; /* Head of list of marks for an object [mark ref] */ struct fsnotify_mark_connector *connector; - /* Events types to ignore [mark->lock, group->mark_mutex] */ - __u32 ignored_mask; + /* Events types and flags to ignore [mark->lock, group->mark_mutex] */ + __u32 ignore_mask; /* General fsnotify mark flags */ #define FSNOTIFY_MARK_FLAG_ALIVE 0x0001 #define FSNOTIFY_MARK_FLAG_ATTACHED 0x0002 @@ -529,6 +529,7 @@ struct fsnotify_mark { /* fanotify mark flags */ #define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x0100 #define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200 +#define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS 0x0400 unsigned int flags; /* flags [mark->lock] */ }; @@ -655,15 +656,91 @@ extern void fsnotify_remove_queued_event(struct fsnotify_group *group, /* functions used to manipulate the marks attached to inodes */ -/* Get mask for calculating object interest taking ignored mask into account */ +/* + * Canonical "ignore mask" including event flags. + * + * Note the subtle semantic difference from the legacy ->ignored_mask. + * ->ignored_mask traditionally only meant which events should be ignored, + * while ->ignore_mask also includes flags regarding the type of objects on + * which events should be ignored. + */ +static inline __u32 fsnotify_ignore_mask(struct fsnotify_mark *mark) +{ + __u32 ignore_mask = mark->ignore_mask; + + /* The event flags in ignore mask take effect */ + if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS) + return ignore_mask; + + /* + * Legacy behavior: + * - Always ignore events on dir + * - Ignore events on child if parent is watching children + */ + ignore_mask |= FS_ISDIR; + ignore_mask &= ~FS_EVENT_ON_CHILD; + ignore_mask |= mark->mask & FS_EVENT_ON_CHILD; + + return ignore_mask; +} + +/* Legacy ignored_mask - only event types to ignore */ +static inline __u32 fsnotify_ignored_events(struct fsnotify_mark *mark) +{ + return mark->ignore_mask & ALL_FSNOTIFY_EVENTS; +} + +/* + * Check if mask (or ignore mask) should be applied depending if victim is a + * directory and whether it is reported to a watching parent. + */ +static inline bool fsnotify_mask_applicable(__u32 mask, bool is_dir, + int iter_type) +{ + /* Should mask be applied to a directory? */ + if (is_dir && !(mask & FS_ISDIR)) + return false; + + /* Should mask be applied to a child? */ + if (iter_type == FSNOTIFY_ITER_TYPE_PARENT && + !(mask & FS_EVENT_ON_CHILD)) + return false; + + return true; +} + +/* + * Effective ignore mask taking into account if event victim is a + * directory and whether it is reported to a watching parent. + */ +static inline __u32 fsnotify_effective_ignore_mask(struct fsnotify_mark *mark, + bool is_dir, int iter_type) +{ + __u32 ignore_mask = fsnotify_ignored_events(mark); + + if (!ignore_mask) + return 0; + + /* For non-dir and non-child, no need to consult the event flags */ + if (!is_dir && iter_type != FSNOTIFY_ITER_TYPE_PARENT) + return ignore_mask; + + ignore_mask = fsnotify_ignore_mask(mark); + if (!fsnotify_mask_applicable(ignore_mask, is_dir, iter_type)) + return 0; + + return ignore_mask & ALL_FSNOTIFY_EVENTS; +} + +/* Get mask for calculating object interest taking ignore mask into account */ static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark) { __u32 mask = mark->mask; - if (!mark->ignored_mask) + if (!fsnotify_ignored_events(mark)) return mask; - /* Interest in FS_MODIFY may be needed for clearing ignored mask */ + /* Interest in FS_MODIFY may be needed for clearing ignore mask */ if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) mask |= FS_MODIFY; @@ -671,7 +748,7 @@ static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark) * If mark is interested in ignoring events on children, the object must * show interest in those events for fsnotify_parent() to notice it. */ - return mask | (mark->ignored_mask & ALL_FSNOTIFY_EVENTS); + return mask | mark->ignore_mask; } /* Get mask of events for a list of marks */ diff --git a/include/linux/mm.h b/include/linux/mm.h index cf3d0d673f6b..7898e29bcfb5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1130,23 +1130,27 @@ static inline bool is_zone_movable_page(const struct page *page) #if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX) DECLARE_STATIC_KEY_FALSE(devmap_managed_key); -bool __put_devmap_managed_page(struct page *page); -static inline bool put_devmap_managed_page(struct page *page) +bool __put_devmap_managed_page_refs(struct page *page, int refs); +static inline bool put_devmap_managed_page_refs(struct page *page, int refs) { if (!static_branch_unlikely(&devmap_managed_key)) return false; if (!is_zone_device_page(page)) return false; - return __put_devmap_managed_page(page); + return __put_devmap_managed_page_refs(page, refs); } - #else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ -static inline bool put_devmap_managed_page(struct page *page) +static inline bool put_devmap_managed_page_refs(struct page *page, int refs) { return false; } #endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ +static inline bool put_devmap_managed_page(struct page *page) +{ + return put_devmap_managed_page_refs(page, 1); +} + /* 127: arbitrary random number, small enough to assemble well */ #define folio_ref_zero_or_close_to_overflow(folio) \ ((unsigned int) folio_ref_count(folio) + 127u <= 127u) diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 29917850f079..8df475db88c0 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -260,6 +260,7 @@ struct plat_stmmacenet_data { bool has_crossts; int int_snapshot_num; int ext_snapshot_num; + bool int_snapshot_en; bool ext_snapshot_en; bool multi_msi_en; int msi_mac_vec; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index f7506f08e505..c04f359655b8 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -405,6 +405,9 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev) { const struct inet6_dev *idev = __in6_dev_get(dev); + if (unlikely(!idev)) + return true; + return !!idev->cnf.ignore_routes_with_linkdown; } diff --git a/include/net/amt.h b/include/net/amt.h index 0e40c3d64fcf..08fc30cf2f34 100644 --- a/include/net/amt.h +++ b/include/net/amt.h @@ -78,6 +78,15 @@ enum amt_status { #define AMT_STATUS_MAX (__AMT_STATUS_MAX - 1) +/* Gateway events only */ +enum amt_event { + AMT_EVENT_NONE, + AMT_EVENT_RECEIVE, + AMT_EVENT_SEND_DISCOVERY, + AMT_EVENT_SEND_REQUEST, + __AMT_EVENT_MAX, +}; + struct amt_header { #if defined(__LITTLE_ENDIAN_BITFIELD) u8 type:4, @@ -292,6 +301,12 @@ struct amt_group_node { struct hlist_head sources[]; }; +#define AMT_MAX_EVENTS 16 +struct amt_events { + enum amt_event event; + struct sk_buff *skb; +}; + struct amt_dev { struct net_device *dev; struct net_device *stream_dev; @@ -308,6 +323,7 @@ struct amt_dev { struct delayed_work req_wq; /* Protected by RTNL */ struct delayed_work secret_wq; + struct work_struct event_wq; /* AMT status */ enum amt_status status; /* Generated key */ @@ -345,6 +361,10 @@ struct amt_dev { /* Used only in gateway mode */ u64 mac:48, reserved:16; + /* AMT gateway side message handler queue */ + struct amt_events events[AMT_MAX_EVENTS]; + u8 event_idx; + u8 nr_events; }; #define AMT_TOS 0xc0 diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 3c4f550e5a8b..2f766e3437ce 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -847,6 +847,7 @@ enum { }; void l2cap_chan_hold(struct l2cap_chan *c); +struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c); void l2cap_chan_put(struct l2cap_chan *c); static inline void l2cap_chan_lock(struct l2cap_chan *chan) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 85cd695e7fd1..ee88f0f1350f 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -321,7 +321,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); -#define TCP_PINGPONG_THRESH 3 +#define TCP_PINGPONG_THRESH 1 static inline void inet_csk_enter_pingpong_mode(struct sock *sk) { @@ -338,14 +338,6 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk) return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; } -static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - if (icsk->icsk_ack.pingpong < U8_MAX) - icsk->icsk_ack.pingpong++; -} - static inline bool inet_csk_has_ulp(struct sock *sk) { return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ebfa3df6f8dc..fd6b510d114b 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -179,7 +179,7 @@ static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept, + return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index daead5fb389a..6395f6b9a5d2 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -107,7 +107,8 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) { - if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) + if (!sk->sk_mark && + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)) return skb->mark; return sk->sk_mark; @@ -120,7 +121,7 @@ static inline int inet_request_bound_dev_if(const struct sock *sk, #ifdef CONFIG_NET_L3_MASTER_DEV struct net *net = sock_net(sk); - if (!bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept) + if (!bound_dev_if && READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept)) return l3mdev_master_ifindex_by_index(net, skb->skb_iif); #endif @@ -132,7 +133,7 @@ static inline int inet_sk_bound_l3mdev(const struct sock *sk) #ifdef CONFIG_NET_L3_MASTER_DEV struct net *net = sock_net(sk); - if (!net->ipv4.sysctl_tcp_l3mdev_accept) + if (!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept)) return l3mdev_master_ifindex_by_index(net, sk->sk_bound_dev_if); #endif @@ -374,7 +375,7 @@ static inline bool inet_get_convert_csum(struct sock *sk) static inline bool inet_can_nonlocal_bind(struct net *net, struct inet_sock *inet) { - return net->ipv4.sysctl_ip_nonlocal_bind || + return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) || inet->freebind || inet->transparent; } diff --git a/include/net/ip.h b/include/net/ip.h index 26fffda78cca..1c979fd1904c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -357,7 +357,7 @@ static inline bool sysctl_dev_name_is_allowed(const char *name) static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port) { - return port < net->ipv4.sysctl_ip_prot_sock; + return port < READ_ONCE(net->ipv4.sysctl_ip_prot_sock); } #else @@ -384,7 +384,7 @@ void ipfrag_init(void); void ip_static_sysctl_init(void); #define IP4_REPLY_MARK(net, mark) \ - ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) + (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0) static inline bool ip_is_fragment(const struct iphdr *iph) { @@ -446,7 +446,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, struct net *net = dev_net(dst->dev); unsigned int mtu; - if (net->ipv4.sysctl_ip_fwd_use_pmtu || + if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || ip_mtu_locked(dst) || !forwarding) { mtu = rt->rt_pmtu; diff --git a/include/net/protocol.h b/include/net/protocol.h index f51c06ae365f..6aef8cb11cc8 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -35,8 +35,6 @@ /* This is used to register protocols. */ struct net_protocol { - int (*early_demux)(struct sk_buff *skb); - int (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); /* This returns an error if we weren't able to handle the error. */ @@ -52,8 +50,6 @@ struct net_protocol { #if IS_ENABLED(CONFIG_IPV6) struct inet6_protocol { - void (*early_demux)(struct sk_buff *skb); - void (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); /* This returns an error if we weren't able to handle the error. */ diff --git a/include/net/route.h b/include/net/route.h index 991a3985712d..bbcf2aba149f 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -373,7 +373,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) struct net *net = dev_net(dst->dev); if (hoplimit == 0) - hoplimit = net->ipv4.sysctl_ip_default_ttl; + hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); return hoplimit; } diff --git a/include/net/sock.h b/include/net/sock.h index 9fa54762e077..7a48991cdb19 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2843,18 +2843,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto) { /* Does this proto have per netns sysctl_wmem ? */ if (proto->sysctl_wmem_offset) - return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset); + return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset)); - return *proto->sysctl_wmem; + return READ_ONCE(*proto->sysctl_wmem); } static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) { /* Does this proto have per netns sysctl_rmem ? */ if (proto->sysctl_rmem_offset) - return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset); + return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset)); - return *proto->sysctl_rmem; + return READ_ONCE(*proto->sysctl_rmem); } /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10) diff --git a/include/net/tcp.h b/include/net/tcp.h index 1e99f5c61f84..78a64e1b33a7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -932,7 +932,7 @@ extern const struct inet_connection_sock_af_ops ipv6_specific; INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb)); -INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb)); +void tcp_v6_early_demux(struct sk_buff *skb); #endif @@ -1403,8 +1403,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); s32 delta; - if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out || - ca_ops->cong_control) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) || + tp->packets_out || ca_ops->cong_control) return; delta = tcp_jiffies32 - tp->lsndtime; if (delta > inet_csk(sk)->icsk_rto) @@ -1419,7 +1419,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, static inline int tcp_win_from_space(const struct sock *sk, int space) { - int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; + int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale); return tcp_adv_win_scale <= 0 ? (space>>(-tcp_adv_win_scale)) : @@ -1493,21 +1493,24 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl; + return tp->keepalive_intvl ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); } static inline int keepalive_time_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time; + return tp->keepalive_time ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); } static inline int keepalive_probes(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes; + return tp->keepalive_probes ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); } static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) @@ -1520,7 +1523,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) static inline int tcp_fin_time(const struct sock *sk) { - int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout; + int fin_timeout = tcp_sk(sk)->linger2 ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout); const int rto = inet_csk(sk)->icsk_rto; if (fin_timeout < (rto << 2) - (rto >> 1)) @@ -2023,7 +2027,7 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; + return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); } bool tcp_stream_memory_free(const struct sock *sk, int wake); diff --git a/include/net/udp.h b/include/net/udp.h index b83a00330566..8dd4aa1485a6 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -167,7 +167,7 @@ static inline void udp_csum_pull_header(struct sk_buff *skb) typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport, __be16 dport); -INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *)); +void udp_v6_early_demux(struct sk_buff *skb); INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, @@ -238,7 +238,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept, + return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); diff --git a/include/trace/events/dlm.h b/include/trace/events/dlm.h index 32088c603244..bad21222130e 100644 --- a/include/trace/events/dlm.h +++ b/include/trace/events/dlm.h @@ -49,38 +49,52 @@ /* note: we begin tracing dlm_lock_start() only if ls and lkb are found */ TRACE_EVENT(dlm_lock_start, - TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode, - __u32 flags), + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, void *name, + unsigned int namelen, int mode, __u32 flags), - TP_ARGS(ls, lkb, mode, flags), + TP_ARGS(ls, lkb, name, namelen, mode, flags), TP_STRUCT__entry( __field(__u32, ls_id) __field(__u32, lkb_id) __field(int, mode) __field(__u32, flags) + __dynamic_array(unsigned char, res_name, + lkb->lkb_resource ? lkb->lkb_resource->res_length : namelen) ), TP_fast_assign( + struct dlm_rsb *r; + __entry->ls_id = ls->ls_global_id; __entry->lkb_id = lkb->lkb_id; __entry->mode = mode; __entry->flags = flags; + + r = lkb->lkb_resource; + if (r) + memcpy(__get_dynamic_array(res_name), r->res_name, + __get_dynamic_array_len(res_name)); + else if (name) + memcpy(__get_dynamic_array(res_name), name, + __get_dynamic_array_len(res_name)); ), - TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s", + TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s res_name=%s", __entry->ls_id, __entry->lkb_id, show_lock_mode(__entry->mode), - show_lock_flags(__entry->flags)) + show_lock_flags(__entry->flags), + __print_hex_str(__get_dynamic_array(res_name), + __get_dynamic_array_len(res_name))) ); TRACE_EVENT(dlm_lock_end, - TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode, __u32 flags, - int error), + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, void *name, + unsigned int namelen, int mode, __u32 flags, int error), - TP_ARGS(ls, lkb, mode, flags, error), + TP_ARGS(ls, lkb, name, namelen, mode, flags, error), TP_STRUCT__entry( __field(__u32, ls_id) @@ -88,14 +102,26 @@ TRACE_EVENT(dlm_lock_end, __field(int, mode) __field(__u32, flags) __field(int, error) + __dynamic_array(unsigned char, res_name, + lkb->lkb_resource ? lkb->lkb_resource->res_length : namelen) ), TP_fast_assign( + struct dlm_rsb *r; + __entry->ls_id = ls->ls_global_id; __entry->lkb_id = lkb->lkb_id; __entry->mode = mode; __entry->flags = flags; + r = lkb->lkb_resource; + if (r) + memcpy(__get_dynamic_array(res_name), r->res_name, + __get_dynamic_array_len(res_name)); + else if (name) + memcpy(__get_dynamic_array(res_name), name, + __get_dynamic_array_len(res_name)); + /* return value will be zeroed in those cases by dlm_lock() * we do it here again to not introduce more overhead if * trace isn't running and error reflects the return value. @@ -104,12 +130,15 @@ TRACE_EVENT(dlm_lock_end, __entry->error = 0; else __entry->error = error; + ), - TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s error=%d", + TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s error=%d res_name=%s", __entry->ls_id, __entry->lkb_id, show_lock_mode(__entry->mode), - show_lock_flags(__entry->flags), __entry->error) + show_lock_flags(__entry->flags), __entry->error, + __print_hex_str(__get_dynamic_array(res_name), + __get_dynamic_array_len(res_name))) ); @@ -123,42 +152,65 @@ TRACE_EVENT(dlm_bast, __field(__u32, ls_id) __field(__u32, lkb_id) __field(int, mode) + __dynamic_array(unsigned char, res_name, + lkb->lkb_resource ? lkb->lkb_resource->res_length : 0) ), TP_fast_assign( + struct dlm_rsb *r; + __entry->ls_id = ls->ls_global_id; __entry->lkb_id = lkb->lkb_id; __entry->mode = mode; + + r = lkb->lkb_resource; + if (r) + memcpy(__get_dynamic_array(res_name), r->res_name, + __get_dynamic_array_len(res_name)); ), - TP_printk("ls_id=%u lkb_id=%x mode=%s", __entry->ls_id, - __entry->lkb_id, show_lock_mode(__entry->mode)) + TP_printk("ls_id=%u lkb_id=%x mode=%s res_name=%s", + __entry->ls_id, __entry->lkb_id, + show_lock_mode(__entry->mode), + __print_hex_str(__get_dynamic_array(res_name), + __get_dynamic_array_len(res_name))) ); TRACE_EVENT(dlm_ast, - TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_lksb *lksb), + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb), - TP_ARGS(ls, lkb, lksb), + TP_ARGS(ls, lkb), TP_STRUCT__entry( __field(__u32, ls_id) __field(__u32, lkb_id) __field(u8, sb_flags) __field(int, sb_status) + __dynamic_array(unsigned char, res_name, + lkb->lkb_resource ? lkb->lkb_resource->res_length : 0) ), TP_fast_assign( + struct dlm_rsb *r; + __entry->ls_id = ls->ls_global_id; __entry->lkb_id = lkb->lkb_id; - __entry->sb_flags = lksb->sb_flags; - __entry->sb_status = lksb->sb_status; + __entry->sb_flags = lkb->lkb_lksb->sb_flags; + __entry->sb_status = lkb->lkb_lksb->sb_status; + + r = lkb->lkb_resource; + if (r) + memcpy(__get_dynamic_array(res_name), r->res_name, + __get_dynamic_array_len(res_name)); ), - TP_printk("ls_id=%u lkb_id=%x sb_flags=%s sb_status=%d", + TP_printk("ls_id=%u lkb_id=%x sb_flags=%s sb_status=%d res_name=%s", __entry->ls_id, __entry->lkb_id, - show_dlm_sb_flags(__entry->sb_flags), __entry->sb_status) + show_dlm_sb_flags(__entry->sb_flags), __entry->sb_status, + __print_hex_str(__get_dynamic_array(res_name), + __get_dynamic_array_len(res_name))) ); @@ -173,17 +225,28 @@ TRACE_EVENT(dlm_unlock_start, __field(__u32, ls_id) __field(__u32, lkb_id) __field(__u32, flags) + __dynamic_array(unsigned char, res_name, + lkb->lkb_resource ? lkb->lkb_resource->res_length : 0) ), TP_fast_assign( + struct dlm_rsb *r; + __entry->ls_id = ls->ls_global_id; __entry->lkb_id = lkb->lkb_id; __entry->flags = flags; + + r = lkb->lkb_resource; + if (r) + memcpy(__get_dynamic_array(res_name), r->res_name, + __get_dynamic_array_len(res_name)); ), - TP_printk("ls_id=%u lkb_id=%x flags=%s", + TP_printk("ls_id=%u lkb_id=%x flags=%s res_name=%s", __entry->ls_id, __entry->lkb_id, - show_lock_flags(__entry->flags)) + show_lock_flags(__entry->flags), + __print_hex_str(__get_dynamic_array(res_name), + __get_dynamic_array_len(res_name))) ); @@ -199,18 +262,29 @@ TRACE_EVENT(dlm_unlock_end, __field(__u32, lkb_id) __field(__u32, flags) __field(int, error) + __dynamic_array(unsigned char, res_name, + lkb->lkb_resource ? lkb->lkb_resource->res_length : 0) ), TP_fast_assign( + struct dlm_rsb *r; + __entry->ls_id = ls->ls_global_id; __entry->lkb_id = lkb->lkb_id; __entry->flags = flags; __entry->error = error; + + r = lkb->lkb_resource; + if (r) + memcpy(__get_dynamic_array(res_name), r->res_name, + __get_dynamic_array_len(res_name)); ), - TP_printk("ls_id=%u lkb_id=%x flags=%s error=%d", + TP_printk("ls_id=%u lkb_id=%x flags=%s error=%d res_name=%s", __entry->ls_id, __entry->lkb_id, - show_lock_flags(__entry->flags), __entry->error) + show_lock_flags(__entry->flags), __entry->error, + __print_hex_str(__get_dynamic_array(res_name), + __get_dynamic_array_len(res_name))) ); diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h index f13d37b60775..1ecdb911add8 100644 --- a/include/uapi/asm-generic/fcntl.h +++ b/include/uapi/asm-generic/fcntl.h @@ -192,6 +192,7 @@ struct f_owner_ex { #define F_LINUX_SPECIFIC_BASE 1024 +#ifndef HAVE_ARCH_STRUCT_FLOCK struct flock { short l_type; short l_whence; @@ -216,5 +217,6 @@ struct flock64 { __ARCH_FLOCK64_PAD #endif }; +#endif /* HAVE_ARCH_STRUCT_FLOCK */ #endif /* _ASM_GENERIC_FCNTL_H */ diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index f1f89132d60e..d8536d77fea1 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -83,12 +83,20 @@ #define FAN_MARK_FLUSH 0x00000080 /* FAN_MARK_FILESYSTEM is 0x00000100 */ #define FAN_MARK_EVICTABLE 0x00000200 +/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */ +#define FAN_MARK_IGNORE 0x00000400 /* These are NOT bitwise flags. Both bits can be used togther. */ #define FAN_MARK_INODE 0x00000000 #define FAN_MARK_MOUNT 0x00000010 #define FAN_MARK_FILESYSTEM 0x00000100 +/* + * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY + * for non-inode mark types. + */ +#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY) + /* Deprecated - do not use this in programs and do not add new flags here! */ #define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ FAN_MARK_REMOVE |\ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 811897dadcae..860f867c50c0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -2084,7 +2084,7 @@ struct kvm_stats_header { #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT) -#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES +#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN #define KVM_STATS_BASE_SHIFT 8 #define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT) |
