From 1ce7e4ff24fe338438bc7837e02780f202bf202b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 23 Apr 2010 10:35:52 +0800 Subject: cgroup: Check task_lock in task_subsys_state() Expand task_subsys_state()'s rcu_dereference_check() to include the full locking rule as documented in Documentation/cgroups/cgroups.txt by adding a check for task->alloc_lock being held. This fixes an RCU false positive when resuming from suspend. The warning comes from freezer cgroup in cgroup_freezing_or_frozen(). Signed-off-by: Li Zefan Acked-by: Matt Helsley Signed-off-by: Paul E. McKenney --- include/linux/cgroup.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b8ad1ea99586..8f78073d7caa 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -530,6 +530,7 @@ static inline struct cgroup_subsys_state *task_subsys_state( { return rcu_dereference_check(task->cgroups->subsys[subsys_id], rcu_read_lock_held() || + lockdep_is_held(&task->alloc_lock) || cgroup_lock_is_held()); } -- cgit v1.2.3 From ee84b8243b07c33a5c8aed42b4b2da60cb16d1d2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 May 2010 09:28:41 -0700 Subject: rcu: create rcu_my_thread_group_empty() wrapper Some RCU-lockdep splat repairs need to know whether they are running in a single-threaded process. Unfortunately, the thread_group_empty() primitive is defined in sched.h, and can induce #include hell. This commit therefore introduces a rcu_my_thread_group_empty() wrapper that is defined in rcupdate.c, thus avoiding the need to include sched.h everywhere. Signed-off-by: "Paul E. McKenney" --- include/linux/rcupdate.h | 2 ++ kernel/rcupdate.c | 11 +++++++++++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 07db2feb8572..db266bbed23f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -190,6 +190,8 @@ static inline int rcu_read_lock_sched_held(void) #ifdef CONFIG_PROVE_RCU +extern int rcu_my_thread_group_empty(void); + /** * rcu_dereference_check - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 03a7ea1579f6..49d808e833b0 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -122,3 +122,14 @@ void wakeme_after_rcu(struct rcu_head *head) rcu = container_of(head, struct rcu_synchronize, head); complete(&rcu->completion); } + +#ifdef CONFIG_PROVE_RCU +/* + * wrapper function to avoid #include problems. + */ +int rcu_my_thread_group_empty(void) +{ + return thread_group_empty(current); +} +EXPORT_SYMBOL_GPL(rcu_my_thread_group_empty); +#endif /* #ifdef CONFIG_PROVE_RCU */ -- cgit v1.2.3 From 34441427aab4bdb3069a4ffcda69a99357abcb2e Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Tue, 11 May 2010 14:06:46 -0700 Subject: revert "procfs: provide stack information for threads" and its fixup commits Originally, commit d899bf7b ("procfs: provide stack information for threads") attempted to introduce a new feature for showing where the threadstack was located and how many pages are being utilized by the stack. Commit c44972f1 ("procfs: disable per-task stack usage on NOMMU") was applied to fix the NO_MMU case. Commit 89240ba0 ("x86, fs: Fix x86 procfs stack information for threads on 64-bit") was applied to fix a bug in ia32 executables being loaded. Commit 9ebd4eba7 ("procfs: fix /proc//stat stack pointer for kernel threads") was applied to fix a bug which had kernel threads printing a userland stack address. Commit 1306d603f ('proc: partially revert "procfs: provide stack information for threads"') was then applied to revert the stack pages being used to solve a significant performance regression. This patch nearly undoes the effect of all these patches. The reason for reverting these is it provides an unusable value in field 28. For x86_64, a fork will result in the task->stack_start value being updated to the current user top of stack and not the stack start address. This unpredictability of the stack_start value makes it worthless. That includes the intended use of showing how much stack space a thread has. Other architectures will get different values. As an example, ia64 gets 0. The do_fork() and copy_process() functions appear to treat the stack_start and stack_size parameters as architecture specific. I only partially reverted c44972f1 ("procfs: disable per-task stack usage on NOMMU") . If I had completely reverted it, I would have had to change mm/Makefile only build pagewalk.o when CONFIG_PROC_PAGE_MONITOR is configured. Since I could not test the builds without significant effort, I decided to not change mm/Makefile. I only partially reverted 89240ba0 ("x86, fs: Fix x86 procfs stack information for threads on 64-bit") . I left the KSTK_ESP() change in place as that seemed worthwhile. Signed-off-by: Robin Holt Cc: Stefani Seibold Cc: KOSAKI Motohiro Cc: Michal Simek Cc: Ingo Molnar Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 3 +-- fs/compat.c | 2 -- fs/exec.c | 2 -- fs/proc/array.c | 3 +-- fs/proc/task_mmu.c | 19 ------------------- include/linux/sched.h | 1 - kernel/fork.c | 2 -- 7 files changed, 2 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index a4f30faa4f1f..1e359b62c40a 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -316,7 +316,7 @@ address perms offset dev inode pathname 08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test 0804a000-0806b000 rw-p 00000000 00:00 0 [heap] a7cb1000-a7cb2000 ---p 00000000 00:00 0 -a7cb2000-a7eb2000 rw-p 00000000 00:00 0 [threadstack:001ff4b4] +a7cb2000-a7eb2000 rw-p 00000000 00:00 0 a7eb2000-a7eb3000 ---p 00000000 00:00 0 a7eb3000-a7ed5000 rw-p 00000000 00:00 0 a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6 @@ -352,7 +352,6 @@ is not associated with a file: [stack] = the stack of the main process [vdso] = the "virtual dynamic shared object", the kernel system call handler - [threadstack:xxxxxxxx] = the stack of the thread, xxxxxxxx is the stack size or if empty, the mapping is anonymous. diff --git a/fs/compat.c b/fs/compat.c index 4b6ed03cc478..05448730f840 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1531,8 +1531,6 @@ int compat_do_execve(char * filename, if (retval < 0) goto out; - current->stack_start = current->mm->start_stack; - /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; diff --git a/fs/exec.c b/fs/exec.c index 49cdaa19e5b9..e6e94c626c2c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1387,8 +1387,6 @@ int do_execve(char * filename, if (retval < 0) goto out; - current->stack_start = current->mm->start_stack; - /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; diff --git a/fs/proc/array.c b/fs/proc/array.c index e51f2ec2c5e5..885ab5513ac5 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -81,7 +81,6 @@ #include #include #include -#include #include #include @@ -495,7 +494,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, rsslim, mm ? mm->start_code : 0, mm ? mm->end_code : 0, - (permitted && mm) ? task->stack_start : 0, + (permitted && mm) ? mm->start_stack : 0, esp, eip, /* The signal information here is obsolete. diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 070553427dd5..47f5b145f56e 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -247,25 +247,6 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) } else if (vma->vm_start <= mm->start_stack && vma->vm_end >= mm->start_stack) { name = "[stack]"; - } else { - unsigned long stack_start; - struct proc_maps_private *pmp; - - pmp = m->private; - stack_start = pmp->task->stack_start; - - if (vma->vm_start <= stack_start && - vma->vm_end >= stack_start) { - pad_len_spaces(m, len); - seq_printf(m, - "[threadstack:%08lx]", -#ifdef CONFIG_STACK_GROWSUP - vma->vm_end - stack_start -#else - stack_start - vma->vm_start -#endif - ); - } } } else { name = "[vdso]"; diff --git a/include/linux/sched.h b/include/linux/sched.h index dad7f668ebf7..2b7b81df78b3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1497,7 +1497,6 @@ struct task_struct { /* bitmask of trace recursion */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ - unsigned long stack_start; #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */ struct memcg_batch_info { int do_batch; /* incremented when batch uncharge started */ diff --git a/kernel/fork.c b/kernel/fork.c index 44b0791b0a2e..4c14942a0ee3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1114,8 +1114,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->bts = NULL; - p->stack_start = stack_start; - /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); -- cgit v1.2.3 From d83c49f3e36cecd2e8823b6c48ffba083b8a5704 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 30 Apr 2010 17:17:09 -0400 Subject: Fix the regression created by "set S_DEAD on unlink()..." commit 1) i_flags simply doesn't work for mount/unlink race prevention; we may have many links to file and rm on one of those obviously shouldn't prevent bind on top of another later on. To fix it right way we need to mark _dentry_ as unsuitable for mounting upon; new flag (DCACHE_CANT_MOUNT) is protected by d_flags and i_mutex on the inode in question. Set it (with dont_mount(dentry)) in unlink/rmdir/etc., check (with cant_mount(dentry)) in places in namespace.c that used to check for S_DEAD. Setting S_DEAD is still needed in places where we used to set it (for directories getting killed), since we rely on it for readdir/rmdir race prevention. 2) rename()/mount() protection has another bogosity - we unhash the target before we'd checked that it's not a mountpoint. Fixed. 3) ancient bogosity in pivot_root() - we locked i_mutex on the right directory, but checked S_DEAD on the different (and wrong) one. Noticed and fixed. Signed-off-by: Al Viro --- drivers/usb/core/inode.c | 1 + fs/configfs/dir.c | 4 ++++ fs/namei.c | 21 +++++++++++++-------- fs/namespace.c | 6 +++--- include/linux/dcache.h | 14 ++++++++++++++ 5 files changed, 35 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 4a6366a42129..111a01a747fc 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -380,6 +380,7 @@ static int usbfs_rmdir(struct inode *dir, struct dentry *dentry) mutex_lock(&inode->i_mutex); dentry_unhash(dentry); if (usbfs_empty(dentry)) { + dont_mount(dentry); drop_nlink(dentry->d_inode); drop_nlink(dentry->d_inode); dput(dentry); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 8e48b52205aa..0b502f80c691 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -645,6 +645,7 @@ static void detach_groups(struct config_group *group) configfs_detach_group(sd->s_element); child->d_inode->i_flags |= S_DEAD; + dont_mount(child); mutex_unlock(&child->d_inode->i_mutex); @@ -840,6 +841,7 @@ static int configfs_attach_item(struct config_item *parent_item, mutex_lock(&dentry->d_inode->i_mutex); configfs_remove_dir(item); dentry->d_inode->i_flags |= S_DEAD; + dont_mount(dentry); mutex_unlock(&dentry->d_inode->i_mutex); d_delete(dentry); } @@ -882,6 +884,7 @@ static int configfs_attach_group(struct config_item *parent_item, if (ret) { configfs_detach_item(item); dentry->d_inode->i_flags |= S_DEAD; + dont_mount(dentry); } configfs_adjust_dir_dirent_depth_after_populate(sd); mutex_unlock(&dentry->d_inode->i_mutex); @@ -1725,6 +1728,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) mutex_unlock(&configfs_symlink_mutex); configfs_detach_group(&group->cg_item); dentry->d_inode->i_flags |= S_DEAD; + dont_mount(dentry); mutex_unlock(&dentry->d_inode->i_mutex); d_delete(dentry); diff --git a/fs/namei.c b/fs/namei.c index 16df7277a92e..b86b96fe1dc3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2176,8 +2176,10 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) error = security_inode_rmdir(dir, dentry); if (!error) { error = dir->i_op->rmdir(dir, dentry); - if (!error) + if (!error) { dentry->d_inode->i_flags |= S_DEAD; + dont_mount(dentry); + } } } mutex_unlock(&dentry->d_inode->i_mutex); @@ -2261,7 +2263,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) if (!error) { error = dir->i_op->unlink(dir, dentry); if (!error) - dentry->d_inode->i_flags |= S_DEAD; + dont_mount(dentry); } } mutex_unlock(&dentry->d_inode->i_mutex); @@ -2572,17 +2574,20 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, return error; target = new_dentry->d_inode; - if (target) { + if (target) mutex_lock(&target->i_mutex); - dentry_unhash(new_dentry); - } if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) error = -EBUSY; - else + else { + if (target) + dentry_unhash(new_dentry); error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); + } if (target) { - if (!error) + if (!error) { target->i_flags |= S_DEAD; + dont_mount(new_dentry); + } mutex_unlock(&target->i_mutex); if (d_unhashed(new_dentry)) d_rehash(new_dentry); @@ -2614,7 +2619,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); if (!error) { if (target) - target->i_flags |= S_DEAD; + dont_mount(new_dentry); if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) d_move(old_dentry, new_dentry); } diff --git a/fs/namespace.c b/fs/namespace.c index 8174c8ab5c70..f20cb57d1067 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1432,7 +1432,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path) err = -ENOENT; mutex_lock(&path->dentry->d_inode->i_mutex); - if (IS_DEADDIR(path->dentry->d_inode)) + if (cant_mount(path->dentry)) goto out_unlock; err = security_sb_check_sb(mnt, path); @@ -1623,7 +1623,7 @@ static int do_move_mount(struct path *path, char *old_name) err = -ENOENT; mutex_lock(&path->dentry->d_inode->i_mutex); - if (IS_DEADDIR(path->dentry->d_inode)) + if (cant_mount(path->dentry)) goto out1; if (d_unlinked(path->dentry)) @@ -2234,7 +2234,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, if (!check_mnt(root.mnt)) goto out2; error = -ENOENT; - if (IS_DEADDIR(new.dentry->d_inode)) + if (cant_mount(old.dentry)) goto out2; if (d_unlinked(new.dentry)) goto out2; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 30b93b2a01a4..eebb617c17d8 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -186,6 +186,8 @@ d_iput: no no no yes #define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 /* Parent inode is watched by some fsnotify listener */ +#define DCACHE_CANT_MOUNT 0x0100 + extern spinlock_t dcache_lock; extern seqlock_t rename_lock; @@ -358,6 +360,18 @@ static inline int d_unlinked(struct dentry *dentry) return d_unhashed(dentry) && !IS_ROOT(dentry); } +static inline int cant_mount(struct dentry *dentry) +{ + return (dentry->d_flags & DCACHE_CANT_MOUNT); +} + +static inline void dont_mount(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_CANT_MOUNT; + spin_unlock(&dentry->d_lock); +} + static inline struct dentry *dget_parent(struct dentry *dentry) { struct dentry *ret; -- cgit v1.2.3 From c02db8c6290bb992442fec1407643c94cc414375 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Sun, 16 May 2010 01:05:45 -0700 Subject: rtnetlink: make SR-IOV VF interface symmetric Now we have a set of nested attributes: IFLA_VFINFO_LIST (NESTED) IFLA_VF_INFO (NESTED) IFLA_VF_MAC IFLA_VF_VLAN IFLA_VF_TX_RATE This allows a single set to operate on multiple attributes if desired. Among other things, it means a dump can be replayed to set state. The current interface has yet to be released, so this seems like something to consider for 2.6.34. Signed-off-by: Chris Wright Signed-off-by: David S. Miller --- include/linux/if_link.h | 23 +++++-- net/core/rtnetlink.c | 159 +++++++++++++++++++++++++++++++++--------------- 2 files changed, 129 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index c9bf92cd7653..d94963b379d9 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -79,10 +79,7 @@ enum { IFLA_NET_NS_PID, IFLA_IFALIAS, IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */ - IFLA_VF_MAC, /* Hardware queue specific attributes */ - IFLA_VF_VLAN, - IFLA_VF_TX_RATE, /* TX Bandwidth Allocation */ - IFLA_VFINFO, + IFLA_VFINFO_LIST, __IFLA_MAX }; @@ -203,6 +200,24 @@ enum macvlan_mode { /* SR-IOV virtual function managment section */ +enum { + IFLA_VF_INFO_UNSPEC, + IFLA_VF_INFO, + __IFLA_VF_INFO_MAX, +}; + +#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1) + +enum { + IFLA_VF_UNSPEC, + IFLA_VF_MAC, /* Hardware queue specific attributes */ + IFLA_VF_VLAN, + IFLA_VF_TX_RATE, /* TX Bandwidth Allocation */ + __IFLA_VF_MAX, +}; + +#define IFLA_VF_MAX (__IFLA_VF_MAX - 1) + struct ifla_vf_mac { __u32 vf; __u8 mac[32]; /* MAX_ADDR_LEN */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fe776c9ddeca..31e85d327aa2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -602,12 +602,19 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->tx_compressed = b->tx_compressed; }; +/* All VF info */ static inline int rtnl_vfinfo_size(const struct net_device *dev) { - if (dev->dev.parent && dev_is_pci(dev->dev.parent)) - return dev_num_vf(dev->dev.parent) * - sizeof(struct ifla_vf_info); - else + if (dev->dev.parent && dev_is_pci(dev->dev.parent)) { + + int num_vfs = dev_num_vf(dev->dev.parent); + size_t size = nlmsg_total_size(sizeof(struct nlattr)); + size += nlmsg_total_size(num_vfs * sizeof(struct nlattr)); + size += num_vfs * (sizeof(struct ifla_vf_mac) + + sizeof(struct ifla_vf_vlan) + + sizeof(struct ifla_vf_tx_rate)); + return size; + } else return 0; } @@ -629,7 +636,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev) + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(4) /* IFLA_NUM_VF */ - + nla_total_size(rtnl_vfinfo_size(dev)) /* IFLA_VFINFO */ + + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ } @@ -700,14 +707,37 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) { int i; - struct ifla_vf_info ivi; - NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)); - for (i = 0; i < dev_num_vf(dev->dev.parent); i++) { + struct nlattr *vfinfo, *vf; + int num_vfs = dev_num_vf(dev->dev.parent); + + NLA_PUT_U32(skb, IFLA_NUM_VF, num_vfs); + vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); + if (!vfinfo) + goto nla_put_failure; + for (i = 0; i < num_vfs; i++) { + struct ifla_vf_info ivi; + struct ifla_vf_mac vf_mac; + struct ifla_vf_vlan vf_vlan; + struct ifla_vf_tx_rate vf_tx_rate; if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) break; - NLA_PUT(skb, IFLA_VFINFO, sizeof(ivi), &ivi); + vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf; + memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); + vf_vlan.vlan = ivi.vlan; + vf_vlan.qos = ivi.qos; + vf_tx_rate.rate = ivi.tx_rate; + vf = nla_nest_start(skb, IFLA_VF_INFO); + if (!vf) { + nla_nest_cancel(skb, vfinfo); + goto nla_put_failure; + } + NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac); + NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan); + NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate); + nla_nest_end(skb, vf); } + nla_nest_end(skb, vfinfo); } if (dev->rtnl_link_ops) { if (rtnl_link_fill(skb, dev) < 0) @@ -769,12 +799,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINKINFO] = { .type = NLA_NESTED }, [IFLA_NET_NS_PID] = { .type = NLA_U32 }, [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, - [IFLA_VF_MAC] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_mac) }, - [IFLA_VF_VLAN] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_vlan) }, - [IFLA_VF_TX_RATE] = { .type = NLA_BINARY, - .len = sizeof(struct ifla_vf_tx_rate) }, + [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, }; EXPORT_SYMBOL(ifla_policy); @@ -783,6 +808,19 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_DATA] = { .type = NLA_NESTED }, }; +static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { + [IFLA_VF_INFO] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { + [IFLA_VF_MAC] = { .type = NLA_BINARY, + .len = sizeof(struct ifla_vf_mac) }, + [IFLA_VF_VLAN] = { .type = NLA_BINARY, + .len = sizeof(struct ifla_vf_vlan) }, + [IFLA_VF_TX_RATE] = { .type = NLA_BINARY, + .len = sizeof(struct ifla_vf_tx_rate) }, +}; + struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) { struct net *net; @@ -812,6 +850,52 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return 0; } +static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) +{ + int rem, err = -EINVAL; + struct nlattr *vf; + const struct net_device_ops *ops = dev->netdev_ops; + + nla_for_each_nested(vf, attr, rem) { + switch (nla_type(vf)) { + case IFLA_VF_MAC: { + struct ifla_vf_mac *ivm; + ivm = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_mac) + err = ops->ndo_set_vf_mac(dev, ivm->vf, + ivm->mac); + break; + } + case IFLA_VF_VLAN: { + struct ifla_vf_vlan *ivv; + ivv = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_vlan) + err = ops->ndo_set_vf_vlan(dev, ivv->vf, + ivv->vlan, + ivv->qos); + break; + } + case IFLA_VF_TX_RATE: { + struct ifla_vf_tx_rate *ivt; + ivt = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_tx_rate) + err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, + ivt->rate); + break; + } + default: + err = -EINVAL; + break; + } + if (err) + break; + } + return err; +} + static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { @@ -942,40 +1026,17 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, write_unlock_bh(&dev_base_lock); } - if (tb[IFLA_VF_MAC]) { - struct ifla_vf_mac *ivm; - ivm = nla_data(tb[IFLA_VF_MAC]); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_mac) - err = ops->ndo_set_vf_mac(dev, ivm->vf, ivm->mac); - if (err < 0) - goto errout; - modified = 1; - } - - if (tb[IFLA_VF_VLAN]) { - struct ifla_vf_vlan *ivv; - ivv = nla_data(tb[IFLA_VF_VLAN]); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_vlan) - err = ops->ndo_set_vf_vlan(dev, ivv->vf, - ivv->vlan, - ivv->qos); - if (err < 0) - goto errout; - modified = 1; - } - err = 0; - - if (tb[IFLA_VF_TX_RATE]) { - struct ifla_vf_tx_rate *ivt; - ivt = nla_data(tb[IFLA_VF_TX_RATE]); - err = -EOPNOTSUPP; - if (ops->ndo_set_vf_tx_rate) - err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, ivt->rate); - if (err < 0) - goto errout; - modified = 1; + if (tb[IFLA_VFINFO_LIST]) { + struct nlattr *attr; + int rem; + nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { + if (nla_type(attr) != IFLA_VF_INFO) + goto errout; + err = do_setvfinfo(dev, attr); + if (err < 0) + goto errout; + modified = 1; + } } err = 0; -- cgit v1.2.3