From 81aa8efe0190cf5bf7eaafb57341cd7d0aea96cd Mon Sep 17 00:00:00 2001
From: Jens Freimann <jfrei@linux.vnet.ibm.com>
Date: Mon, 7 Oct 2013 16:13:44 +0200
Subject: KVM: s390: add and extend interrupt information data structs

With the currently available struct kvm_s390_interrupt it is not possible to
inject every kind of interrupt as defined in the z/Architecture. Add
additional interruption parameters to the structures and move it to kvm.h

Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 include/uapi/linux/kvm.h | 63 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 932d7f2637d6..86faf47ae494 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -434,6 +434,69 @@ struct kvm_s390_interrupt {
 	__u64 parm64;
 };
 
+struct kvm_s390_io_info {
+	__u16 subchannel_id;
+	__u16 subchannel_nr;
+	__u32 io_int_parm;
+	__u32 io_int_word;
+};
+
+struct kvm_s390_ext_info {
+	__u32 ext_params;
+	__u32 pad;
+	__u64 ext_params2;
+};
+
+struct kvm_s390_pgm_info {
+	__u64 trans_exc_code;
+	__u64 mon_code;
+	__u64 per_address;
+	__u32 data_exc_code;
+	__u16 code;
+	__u16 mon_class_nr;
+	__u8 per_code;
+	__u8 per_atmid;
+	__u8 exc_access_id;
+	__u8 per_access_id;
+	__u8 op_access_id;
+	__u8 pad[3];
+};
+
+struct kvm_s390_prefix_info {
+	__u32 address;
+};
+
+struct kvm_s390_extcall_info {
+	__u16 code;
+};
+
+struct kvm_s390_emerg_info {
+	__u16 code;
+};
+
+struct kvm_s390_mchk_info {
+	__u64 cr14;
+	__u64 mcic;
+	__u64 failing_storage_address;
+	__u32 ext_damage_code;
+	__u32 pad;
+	__u8 fixed_logout[16];
+};
+
+struct kvm_s390_irq {
+	__u64 type;
+	union {
+		struct kvm_s390_io_info io;
+		struct kvm_s390_ext_info ext;
+		struct kvm_s390_pgm_info pgm;
+		struct kvm_s390_emerg_info emerg;
+		struct kvm_s390_extcall_info extcall;
+		struct kvm_s390_prefix_info prefix;
+		struct kvm_s390_mchk_info mchk;
+		char reserved[64];
+	} u;
+};
+
 /* for KVM_SET_GUEST_DEBUG */
 
 #define KVM_GUESTDBG_ENABLE		0x00000001
-- 
cgit v1.2.3


From c05c4186bbe4e99d64e8a36f7ca7f480da5d109f Mon Sep 17 00:00:00 2001
From: Jens Freimann <jfrei@linux.vnet.ibm.com>
Date: Mon, 7 Oct 2013 16:13:45 +0200
Subject: KVM: s390: add floating irq controller

This patch adds a floating irq controller as a kvm_device.
It will be necessary for migration of floating interrupts as well
as for hardening the reset code by allowing user space to explicitly
remove all pending floating interrupts.

Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 Documentation/virtual/kvm/devices/s390_flic.txt |  36 +++
 arch/s390/include/asm/kvm_host.h                |   1 +
 arch/s390/include/uapi/asm/kvm.h                |  14 ++
 arch/s390/kvm/interrupt.c                       | 304 ++++++++++++++++++++----
 arch/s390/kvm/kvm-s390.c                        |   1 +
 include/linux/kvm_host.h                        |   1 +
 include/uapi/linux/kvm.h                        |   1 +
 virt/kvm/kvm_main.c                             |   5 +
 8 files changed, 312 insertions(+), 51 deletions(-)
 create mode 100644 Documentation/virtual/kvm/devices/s390_flic.txt

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt
new file mode 100644
index 000000000000..6b557953066a
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/s390_flic.txt
@@ -0,0 +1,36 @@
+FLIC (floating interrupt controller)
+====================================
+
+FLIC handles floating (non per-cpu) interrupts, i.e. I/O, service and some
+machine check interruptions. All interrupts are stored in a per-vm list of
+pending interrupts. FLIC performs operations on this list.
+
+Only one FLIC instance may be instantiated.
+
+FLIC provides support to
+- add interrupts (KVM_DEV_FLIC_ENQUEUE)
+- inspect currently pending interrupts (KVM_FLIC_GET_ALL_IRQS)
+- purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS)
+
+Groups:
+  KVM_DEV_FLIC_ENQUEUE
+    Passes a buffer and length into the kernel which are then injected into
+    the list of pending interrupts.
+    attr->addr contains the pointer to the buffer and attr->attr contains
+    the length of the buffer.
+    The format of the data structure kvm_s390_irq as it is copied from userspace
+    is defined in usr/include/linux/kvm.h.
+
+  KVM_DEV_FLIC_GET_ALL_IRQS
+    Copies all floating interrupts into a buffer provided by userspace.
+    When the buffer is too small it returns -ENOMEM, which is the indication
+    for userspace to try again with a bigger buffer.
+    All interrupts remain pending, i.e. are not deleted from the list of
+    currently pending interrupts.
+    attr->addr contains the userspace address of the buffer into which all
+    interrupt data will be copied.
+    attr->attr contains the size of the buffer in bytes.
+
+  KVM_DEV_FLIC_CLEAR_IRQS
+    Simply deletes all elements from the list of currently pending floating
+    interrupts.  No interrupts are injected into the guest.
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 3ffc9646e742..59635b5c59a6 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -243,6 +243,7 @@ struct kvm_arch{
 	struct sca_block *sca;
 	debug_info_t *dbf;
 	struct kvm_s390_float_interrupt float_int;
+	struct kvm_device *flic;
 	struct gmap *gmap;
 	int css_support;
 };
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index d25da598ec62..38d5f98552bb 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -16,6 +16,20 @@
 
 #define __KVM_S390
 
+/* Device control API: s390-specific devices */
+#define KVM_DEV_FLIC_GET_ALL_IRQS	1
+#define KVM_DEV_FLIC_ENQUEUE		2
+#define KVM_DEV_FLIC_CLEAR_IRQS		3
+/*
+ * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
+ * as well as up  to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
+ * There are also sclp and machine checks. This gives us
+ * sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000
+ * Lets round up to 8192 pages.
+ */
+
+#define KVM_S390_FLIC_MAX_BUFFER	0x2000000
+
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
 	/* general purpose regs for s390 */
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 5f79d2d79ca7..a5f18babed4c 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -659,53 +659,86 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 	return inti;
 }
 
-int kvm_s390_inject_vm(struct kvm *kvm,
-		       struct kvm_s390_interrupt *s390int)
+static void __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 {
 	struct kvm_s390_local_interrupt *li;
 	struct kvm_s390_float_interrupt *fi;
-	struct kvm_s390_interrupt_info *inti, *iter;
+	struct kvm_s390_interrupt_info *iter;
 	int sigcpu;
 
+	mutex_lock(&kvm->lock);
+	fi = &kvm->arch.float_int;
+	spin_lock(&fi->lock);
+	if (!is_ioint(inti->type)) {
+		list_add_tail(&inti->list, &fi->list);
+	} else {
+		u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
+
+		/* Keep I/O interrupts sorted in isc order. */
+		list_for_each_entry(iter, &fi->list, list) {
+			if (!is_ioint(iter->type))
+				continue;
+			if (int_word_to_isc_bits(iter->io.io_int_word)
+			    <= isc_bits)
+				continue;
+			break;
+		}
+		list_add_tail(&inti->list, &iter->list);
+	}
+	atomic_set(&fi->active, 1);
+	sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
+	if (sigcpu == KVM_MAX_VCPUS) {
+		do {
+			sigcpu = fi->next_rr_cpu++;
+			if (sigcpu == KVM_MAX_VCPUS)
+				sigcpu = fi->next_rr_cpu = 0;
+		} while (fi->local_int[sigcpu] == NULL);
+	}
+	li = fi->local_int[sigcpu];
+	spin_lock_bh(&li->lock);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	if (waitqueue_active(li->wq))
+		wake_up_interruptible(li->wq);
+	spin_unlock_bh(&li->lock);
+	spin_unlock(&fi->lock);
+	mutex_unlock(&kvm->lock);
+}
+
+int kvm_s390_inject_vm(struct kvm *kvm,
+		       struct kvm_s390_interrupt *s390int)
+{
+	struct kvm_s390_interrupt_info *inti;
+
 	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
 	if (!inti)
 		return -ENOMEM;
 
-	switch (s390int->type) {
+	inti->type = s390int->type;
+	switch (inti->type) {
 	case KVM_S390_INT_VIRTIO:
 		VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx",
 			 s390int->parm, s390int->parm64);
-		inti->type = s390int->type;
 		inti->ext.ext_params = s390int->parm;
 		inti->ext.ext_params2 = s390int->parm64;
 		break;
 	case KVM_S390_INT_SERVICE:
 		VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
-		inti->type = s390int->type;
 		inti->ext.ext_params = s390int->parm;
 		break;
-	case KVM_S390_PROGRAM_INT:
-	case KVM_S390_SIGP_STOP:
-	case KVM_S390_INT_EXTERNAL_CALL:
-	case KVM_S390_INT_EMERGENCY:
-		kfree(inti);
-		return -EINVAL;
 	case KVM_S390_MCHK:
 		VM_EVENT(kvm, 5, "inject: machine check parm64:%llx",
 			 s390int->parm64);
-		inti->type = s390int->type;
 		inti->mchk.cr14 = s390int->parm; /* upper bits are not used */
 		inti->mchk.mcic = s390int->parm64;
 		break;
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-		if (s390int->type & IOINT_AI_MASK)
+		if (inti->type & IOINT_AI_MASK)
 			VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
 		else
 			VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
 				 s390int->type & IOINT_CSSID_MASK,
 				 s390int->type & IOINT_SSID_MASK,
 				 s390int->type & IOINT_SCHID_MASK);
-		inti->type = s390int->type;
 		inti->io.subchannel_id = s390int->parm >> 16;
 		inti->io.subchannel_nr = s390int->parm & 0x0000ffffu;
 		inti->io.io_int_parm = s390int->parm64 >> 32;
@@ -718,42 +751,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 	trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64,
 				 2);
 
-	mutex_lock(&kvm->lock);
-	fi = &kvm->arch.float_int;
-	spin_lock(&fi->lock);
-	if (!is_ioint(inti->type))
-		list_add_tail(&inti->list, &fi->list);
-	else {
-		u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
-
-		/* Keep I/O interrupts sorted in isc order. */
-		list_for_each_entry(iter, &fi->list, list) {
-			if (!is_ioint(iter->type))
-				continue;
-			if (int_word_to_isc_bits(iter->io.io_int_word)
-			    <= isc_bits)
-				continue;
-			break;
-		}
-		list_add_tail(&inti->list, &iter->list);
-	}
-	atomic_set(&fi->active, 1);
-	sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
-	if (sigcpu == KVM_MAX_VCPUS) {
-		do {
-			sigcpu = fi->next_rr_cpu++;
-			if (sigcpu == KVM_MAX_VCPUS)
-				sigcpu = fi->next_rr_cpu = 0;
-		} while (fi->local_int[sigcpu] == NULL);
-	}
-	li = fi->local_int[sigcpu];
-	spin_lock_bh(&li->lock);
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
-	if (waitqueue_active(li->wq))
-		wake_up_interruptible(li->wq);
-	spin_unlock_bh(&li->lock);
-	spin_unlock(&fi->lock);
-	mutex_unlock(&kvm->lock);
+	__inject_vm(kvm, inti);
 	return 0;
 }
 
@@ -841,3 +839,207 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 	mutex_unlock(&vcpu->kvm->lock);
 	return 0;
 }
+
+static void clear_floating_interrupts(struct kvm *kvm)
+{
+	struct kvm_s390_float_interrupt *fi;
+	struct kvm_s390_interrupt_info	*n, *inti = NULL;
+
+	mutex_lock(&kvm->lock);
+	fi = &kvm->arch.float_int;
+	spin_lock(&fi->lock);
+	list_for_each_entry_safe(inti, n, &fi->list, list) {
+		list_del(&inti->list);
+		kfree(inti);
+	}
+	atomic_set(&fi->active, 0);
+	spin_unlock(&fi->lock);
+	mutex_unlock(&kvm->lock);
+}
+
+static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
+				   u8 *addr)
+{
+	struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
+	struct kvm_s390_irq irq = {0};
+
+	irq.type = inti->type;
+	switch (inti->type) {
+	case KVM_S390_INT_VIRTIO:
+	case KVM_S390_INT_SERVICE:
+		irq.u.ext = inti->ext;
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		irq.u.io = inti->io;
+		break;
+	case KVM_S390_MCHK:
+		irq.u.mchk = inti->mchk;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (copy_to_user(uptr, &irq, sizeof(irq)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len)
+{
+	struct kvm_s390_interrupt_info *inti;
+	struct kvm_s390_float_interrupt *fi;
+	int ret = 0;
+	int n = 0;
+
+	mutex_lock(&kvm->lock);
+	fi = &kvm->arch.float_int;
+	spin_lock(&fi->lock);
+
+	list_for_each_entry(inti, &fi->list, list) {
+		if (len < sizeof(struct kvm_s390_irq)) {
+			/* signal userspace to try again */
+			ret = -ENOMEM;
+			break;
+		}
+		ret = copy_irq_to_user(inti, buf);
+		if (ret)
+			break;
+		buf += sizeof(struct kvm_s390_irq);
+		len -= sizeof(struct kvm_s390_irq);
+		n++;
+	}
+
+	spin_unlock(&fi->lock);
+	mutex_unlock(&kvm->lock);
+
+	return ret < 0 ? ret : n;
+}
+
+static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	int r;
+
+	switch (attr->group) {
+	case KVM_DEV_FLIC_GET_ALL_IRQS:
+		r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr,
+					  attr->attr);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+	return r;
+}
+
+static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti,
+				     u64 addr)
+{
+	struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
+	void *target = NULL;
+	void __user *source;
+	u64 size;
+
+	if (get_user(inti->type, (u64 __user *)addr))
+		return -EFAULT;
+
+	switch (inti->type) {
+	case KVM_S390_INT_VIRTIO:
+	case KVM_S390_INT_SERVICE:
+		target = (void *) &inti->ext;
+		source = &uptr->u.ext;
+		size = sizeof(inti->ext);
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		target = (void *) &inti->io;
+		source = &uptr->u.io;
+		size = sizeof(inti->io);
+		break;
+	case KVM_S390_MCHK:
+		target = (void *) &inti->mchk;
+		source = &uptr->u.mchk;
+		size = sizeof(inti->mchk);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (copy_from_user(target, source, size))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int enqueue_floating_irq(struct kvm_device *dev,
+				struct kvm_device_attr *attr)
+{
+	struct kvm_s390_interrupt_info *inti = NULL;
+	int r = 0;
+	int len = attr->attr;
+
+	if (len % sizeof(struct kvm_s390_irq) != 0)
+		return -EINVAL;
+	else if (len > KVM_S390_FLIC_MAX_BUFFER)
+		return -EINVAL;
+
+	while (len >= sizeof(struct kvm_s390_irq)) {
+		inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+		if (!inti)
+			return -ENOMEM;
+
+		r = copy_irq_from_user(inti, attr->addr);
+		if (r) {
+			kfree(inti);
+			return r;
+		}
+		__inject_vm(dev->kvm, inti);
+		len -= sizeof(struct kvm_s390_irq);
+		attr->addr += sizeof(struct kvm_s390_irq);
+	}
+
+	return r;
+}
+
+static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	int r = 0;
+
+	switch (attr->group) {
+	case KVM_DEV_FLIC_ENQUEUE:
+		r = enqueue_floating_irq(dev, attr);
+		break;
+	case KVM_DEV_FLIC_CLEAR_IRQS:
+		r = 0;
+		clear_floating_interrupts(dev->kvm);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+	return r;
+}
+
+static int flic_create(struct kvm_device *dev, u32 type)
+{
+	if (!dev)
+		return -EINVAL;
+	if (dev->kvm->arch.flic)
+		return -EINVAL;
+	dev->kvm->arch.flic = dev;
+	return 0;
+}
+
+static void flic_destroy(struct kvm_device *dev)
+{
+	dev->kvm->arch.flic = NULL;
+	kfree(dev);
+}
+
+/* s390 floating irq controller (flic) */
+struct kvm_device_ops kvm_flic_ops = {
+	.name = "kvm-flic",
+	.get_attr = flic_get_attr,
+	.set_attr = flic_set_attr,
+	.create = flic_create,
+	.destroy = flic_destroy,
+};
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index e0676f390d57..782420f3c4d5 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -157,6 +157,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_ENABLE_CAP:
 	case KVM_CAP_S390_CSS_SUPPORT:
 	case KVM_CAP_IOEVENTFD:
+	case KVM_CAP_DEVICE_CTRL:
 		r = 1;
 		break;
 	case KVM_CAP_NR_VCPUS:
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b8e9a43e501a..c0102ef2de48 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1064,6 +1064,7 @@ extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_vfio_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
+extern struct kvm_device_ops kvm_flic_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 86faf47ae494..19f717b15297 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -918,6 +918,7 @@ struct kvm_device_attr {
 #define   KVM_DEV_VFIO_GROUP_ADD			1
 #define   KVM_DEV_VFIO_GROUP_DEL			2
 #define KVM_DEV_TYPE_ARM_VGIC_V2	5
+#define KVM_DEV_TYPE_FLIC		6
 
 /*
  * ioctls for VM fds
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 03a0381b1cb7..a9e999a48e43 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2283,6 +2283,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	case KVM_DEV_TYPE_ARM_VGIC_V2:
 		ops = &kvm_arm_vgic_v2_ops;
 		break;
+#endif
+#ifdef CONFIG_S390
+	case KVM_DEV_TYPE_FLIC:
+		ops = &kvm_flic_ops;
+		break;
 #endif
 	default:
 		return -ENODEV;
-- 
cgit v1.2.3


From 3c038e6be0e299d4d3762d0a9a29f02de6e04991 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Mon, 7 Oct 2013 17:11:48 +0200
Subject: KVM: async_pf: Async page fault support on s390

This patch enables async page faults for s390 kvm guests.
It provides the userspace API to enable and disable_wait this feature.
The disable_wait will enforce that the feature is off by waiting on it.
Also it includes the diagnose code, called by the guest to enable async page faults.

The async page faults will use an already existing guest interface for this
purpose, as described in "CP Programming Services (SC24-6084)".

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 Documentation/virtual/kvm/devices/s390_flic.txt | 10 +++
 arch/s390/include/asm/kvm_host.h                | 22 ++++++
 arch/s390/include/uapi/asm/kvm.h                |  2 +
 arch/s390/kvm/Kconfig                           |  2 +
 arch/s390/kvm/Makefile                          |  2 +-
 arch/s390/kvm/diag.c                            | 84 +++++++++++++++++++++++
 arch/s390/kvm/interrupt.c                       | 65 ++++++++++++++++--
 arch/s390/kvm/kvm-s390.c                        | 89 ++++++++++++++++++++++++-
 arch/s390/kvm/kvm-s390.h                        |  4 ++
 arch/s390/kvm/sigp.c                            |  7 ++
 arch/s390/kvm/trace.h                           | 46 +++++++++++++
 include/uapi/linux/kvm.h                        |  2 +
 12 files changed, 326 insertions(+), 9 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt
index 6b557953066a..410fa673e5b6 100644
--- a/Documentation/virtual/kvm/devices/s390_flic.txt
+++ b/Documentation/virtual/kvm/devices/s390_flic.txt
@@ -11,6 +11,7 @@ FLIC provides support to
 - add interrupts (KVM_DEV_FLIC_ENQUEUE)
 - inspect currently pending interrupts (KVM_FLIC_GET_ALL_IRQS)
 - purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS)
+- enable/disable for the guest transparent async page faults
 
 Groups:
   KVM_DEV_FLIC_ENQUEUE
@@ -34,3 +35,12 @@ Groups:
   KVM_DEV_FLIC_CLEAR_IRQS
     Simply deletes all elements from the list of currently pending floating
     interrupts.  No interrupts are injected into the guest.
+
+  KVM_DEV_FLIC_APF_ENABLE
+    Enables async page faults for the guest. So in case of a major page fault
+    the host is allowed to handle this async and continues the guest.
+
+  KVM_DEV_FLIC_APF_DISABLE_WAIT
+    Disables async page faults for the guest and waits until already pending
+    async page faults are done. This is necessary to trigger a completion interrupt
+    for every init interrupt before migrating the interrupt list.
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index c3c5e1028136..2c69ba285e81 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -231,6 +231,10 @@ struct kvm_vcpu_arch {
 		u64		stidp_data;
 	};
 	struct gmap *gmap;
+#define KVM_S390_PFAULT_TOKEN_INVALID	(-1UL)
+	unsigned long pfault_token;
+	unsigned long pfault_select;
+	unsigned long pfault_compare;
 };
 
 struct kvm_vm_stat {
@@ -257,6 +261,24 @@ static inline bool kvm_is_error_hva(unsigned long addr)
 	return IS_ERR_VALUE(addr);
 }
 
+#define ASYNC_PF_PER_VCPU	64
+struct kvm_vcpu;
+struct kvm_async_pf;
+struct kvm_arch_async_pf {
+	unsigned long pfault_token;
+};
+
+bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
+
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+			       struct kvm_async_pf *work);
+
+void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+				     struct kvm_async_pf *work);
+
+void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+				 struct kvm_async_pf *work);
+
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
 extern char sie_exit;
 #endif
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 058b178391d4..ccfd0b1d056d 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -20,6 +20,8 @@
 #define KVM_DEV_FLIC_GET_ALL_IRQS	1
 #define KVM_DEV_FLIC_ENQUEUE		2
 #define KVM_DEV_FLIC_CLEAR_IRQS		3
+#define KVM_DEV_FLIC_APF_ENABLE		4
+#define KVM_DEV_FLIC_APF_DISABLE_WAIT	5
 /*
  * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
  * as well as up  to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 70b46eacf8e1..c8bacbcd2e5b 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -23,6 +23,8 @@ config KVM
 	select ANON_INODES
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	select HAVE_KVM_EVENTFD
+	select KVM_ASYNC_PF
+	select KVM_ASYNC_PF_SYNC
 	---help---
 	  Support hosting paravirtualized guest machines using the SIE
 	  virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 40b4c6470f88..a47d2c355f68 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -7,7 +7,7 @@
 # as published by the Free Software Foundation.
 
 KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 8216c0e0b2e2..bf9ed34c2bcd 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -17,6 +17,7 @@
 #include "kvm-s390.h"
 #include "trace.h"
 #include "trace-s390.h"
+#include "gaccess.h"
 
 static int diag_release_pages(struct kvm_vcpu *vcpu)
 {
@@ -46,6 +47,87 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
+{
+	struct prs_parm {
+		u16 code;
+		u16 subcode;
+		u16 parm_len;
+		u16 parm_version;
+		u64 token_addr;
+		u64 select_mask;
+		u64 compare_mask;
+		u64 zarch;
+	};
+	struct prs_parm parm;
+	int rc;
+	u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
+	u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
+	unsigned long hva_token = KVM_HVA_ERR_BAD;
+
+	if (vcpu->run->s.regs.gprs[rx] & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm)))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	switch (parm.subcode) {
+	case 0: /* TOKEN */
+		if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
+			/*
+			 * If the pagefault handshake is already activated,
+			 * the token must not be changed.  We have to return
+			 * decimal 8 instead, as mandated in SC24-6084.
+			 */
+			vcpu->run->s.regs.gprs[ry] = 8;
+			return 0;
+		}
+
+		if ((parm.compare_mask & parm.select_mask) != parm.compare_mask ||
+		    parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
+			return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+		hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr));
+		if (kvm_is_error_hva(hva_token))
+			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+		vcpu->arch.pfault_token = parm.token_addr;
+		vcpu->arch.pfault_select = parm.select_mask;
+		vcpu->arch.pfault_compare = parm.compare_mask;
+		vcpu->run->s.regs.gprs[ry] = 0;
+		rc = 0;
+		break;
+	case 1: /*
+		 * CANCEL
+		 * Specification allows to let already pending tokens survive
+		 * the cancel, therefore to reduce code complexity, we assume
+		 * all outstanding tokens are already pending.
+		 */
+		if (parm.token_addr || parm.select_mask ||
+		    parm.compare_mask || parm.zarch)
+			return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+		vcpu->run->s.regs.gprs[ry] = 0;
+		/*
+		 * If the pfault handling was not established or is already
+		 * canceled SC24-6084 requests to return decimal 4.
+		 */
+		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+			vcpu->run->s.regs.gprs[ry] = 4;
+		else
+			vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+
+		rc = 0;
+		break;
+	default:
+		rc = -EOPNOTSUPP;
+		break;
+	}
+
+	return rc;
+}
+
 static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
 {
 	VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
@@ -150,6 +232,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
 		return __diag_time_slice_end(vcpu);
 	case 0x9c:
 		return __diag_time_slice_end_directed(vcpu);
+	case 0x258:
+		return __diag_page_ref_service(vcpu);
 	case 0x308:
 		return __diag_ipl_functions(vcpu);
 	case 0x500:
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 9c9192b5e339..1848080c3f34 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -31,7 +31,7 @@ static int is_ioint(u64 type)
 	return ((type & 0xfffe0000u) != 0xfffe0000u);
 }
 
-static int psw_extint_disabled(struct kvm_vcpu *vcpu)
+int psw_extint_disabled(struct kvm_vcpu *vcpu)
 {
 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
 }
@@ -78,11 +78,8 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
 			return 1;
 		return 0;
 	case KVM_S390_INT_SERVICE:
-		if (psw_extint_disabled(vcpu))
-			return 0;
-		if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
-			return 1;
-		return 0;
+	case KVM_S390_INT_PFAULT_INIT:
+	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_VIRTIO:
 		if (psw_extint_disabled(vcpu))
 			return 0;
@@ -150,6 +147,8 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
 	case KVM_S390_INT_EXTERNAL_CALL:
 	case KVM_S390_INT_EMERGENCY:
 	case KVM_S390_INT_SERVICE:
+	case KVM_S390_INT_PFAULT_INIT:
+	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_VIRTIO:
 		if (psw_extint_disabled(vcpu))
 			__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
@@ -223,6 +222,30 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 		rc |= put_guest(vcpu, inti->ext.ext_params,
 				(u32 __user *)__LC_EXT_PARAMS);
 		break;
+	case KVM_S390_INT_PFAULT_INIT:
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
+						 inti->ext.ext_params2);
+		rc  = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE);
+		rc |= put_guest(vcpu, 0x0600, (u16 __user *) __LC_EXT_CPU_ADDR);
+		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+				      __LC_EXT_NEW_PSW, sizeof(psw_t));
+		rc |= put_guest(vcpu, inti->ext.ext_params2,
+				(u64 __user *) __LC_EXT_PARAMS2);
+		break;
+	case KVM_S390_INT_PFAULT_DONE:
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
+						 inti->ext.ext_params2);
+		rc  = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE);
+		rc |= put_guest(vcpu, 0x0680, (u16 __user *) __LC_EXT_CPU_ADDR);
+		rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+				      __LC_EXT_NEW_PSW, sizeof(psw_t));
+		rc |= put_guest(vcpu, inti->ext.ext_params2,
+				(u64 __user *) __LC_EXT_PARAMS2);
+		break;
 	case KVM_S390_INT_VIRTIO:
 		VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
 			   inti->ext.ext_params, inti->ext.ext_params2);
@@ -357,7 +380,7 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
-static int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
@@ -737,6 +760,10 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 		VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
 		inti->ext.ext_params = s390int->parm;
 		break;
+	case KVM_S390_INT_PFAULT_DONE:
+		inti->type = s390int->type;
+		inti->ext.ext_params2 = s390int->parm64;
+		break;
 	case KVM_S390_MCHK:
 		VM_EVENT(kvm, 5, "inject: machine check parm64:%llx",
 			 s390int->parm64);
@@ -823,6 +850,10 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 		inti->type = s390int->type;
 		inti->mchk.mcic = s390int->parm64;
 		break;
+	case KVM_S390_INT_PFAULT_INIT:
+		inti->type = s390int->type;
+		inti->ext.ext_params2 = s390int->parm64;
+		break;
 	case KVM_S390_INT_VIRTIO:
 	case KVM_S390_INT_SERVICE:
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
@@ -877,6 +908,8 @@ static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
 
 	irq.type = inti->type;
 	switch (inti->type) {
+	case KVM_S390_INT_PFAULT_INIT:
+	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_VIRTIO:
 	case KVM_S390_INT_SERVICE:
 		irq.u.ext = inti->ext;
@@ -956,6 +989,8 @@ static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti,
 		return -EFAULT;
 
 	switch (inti->type) {
+	case KVM_S390_INT_PFAULT_INIT:
+	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_VIRTIO:
 	case KVM_S390_INT_SERVICE:
 		target = (void *) &inti->ext;
@@ -1019,6 +1054,8 @@ static int enqueue_floating_irq(struct kvm_device *dev,
 static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
 	int r = 0;
+	unsigned int i;
+	struct kvm_vcpu *vcpu;
 
 	switch (attr->group) {
 	case KVM_DEV_FLIC_ENQUEUE:
@@ -1028,6 +1065,20 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 		r = 0;
 		clear_floating_interrupts(dev->kvm);
 		break;
+	case KVM_DEV_FLIC_APF_ENABLE:
+		dev->kvm->arch.gmap->pfault_enabled = 1;
+		break;
+	case KVM_DEV_FLIC_APF_DISABLE_WAIT:
+		dev->kvm->arch.gmap->pfault_enabled = 0;
+		/*
+		 * Make sure no async faults are in transition when
+		 * clearing the queues. So we don't need to worry
+		 * about late coming workers.
+		 */
+		synchronize_srcu(&dev->kvm->srcu);
+		kvm_for_each_vcpu(i, vcpu, dev->kvm)
+			kvm_clear_async_pf_completion_queue(vcpu);
+		break;
 	default:
 		r = -EINVAL;
 	}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 9eec794caa7f..d8e9f04977db 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -152,6 +152,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 #ifdef CONFIG_KVM_S390_UCONTROL
 	case KVM_CAP_S390_UCONTROL:
 #endif
+	case KVM_CAP_ASYNC_PF:
 	case KVM_CAP_SYNC_REGS:
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_ENABLE_CAP:
@@ -273,6 +274,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
+	kvm_clear_async_pf_completion_queue(vcpu);
 	if (!kvm_is_ucontrol(vcpu->kvm)) {
 		clear_bit(63 - vcpu->vcpu_id,
 			  (unsigned long *) &vcpu->kvm->arch.sca->mcn);
@@ -322,6 +324,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 /* Section: vcpu related */
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+	kvm_clear_async_pf_completion_queue(vcpu);
 	if (kvm_is_ucontrol(vcpu->kvm)) {
 		vcpu->arch.gmap = gmap_alloc(current->mm);
 		if (!vcpu->arch.gmap)
@@ -382,6 +386,8 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
 	vcpu->arch.guest_fpregs.fpc = 0;
 	asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
 	vcpu->arch.sie_block->gbea = 1;
+	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+	kvm_clear_async_pf_completion_queue(vcpu);
 	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
 }
 
@@ -713,10 +719,89 @@ static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
 	return rc;
 }
 
+static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
+				      unsigned long token)
+{
+	struct kvm_s390_interrupt inti;
+	inti.parm64 = token;
+
+	if (start_token) {
+		inti.type = KVM_S390_INT_PFAULT_INIT;
+		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
+	} else {
+		inti.type = KVM_S390_INT_PFAULT_DONE;
+		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
+	}
+}
+
+void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+				     struct kvm_async_pf *work)
+{
+	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
+	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
+}
+
+void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+				 struct kvm_async_pf *work)
+{
+	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
+	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
+}
+
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+			       struct kvm_async_pf *work)
+{
+	/* s390 will always inject the page directly */
+}
+
+bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * s390 will always inject the page directly,
+	 * but we still want check_async_completion to cleanup
+	 */
+	return true;
+}
+
+static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
+{
+	hva_t hva;
+	struct kvm_arch_async_pf arch;
+	int rc;
+
+	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+		return 0;
+	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
+	    vcpu->arch.pfault_compare)
+		return 0;
+	if (psw_extint_disabled(vcpu))
+		return 0;
+	if (kvm_cpu_has_interrupt(vcpu))
+		return 0;
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
+		return 0;
+	if (!vcpu->arch.gmap->pfault_enabled)
+		return 0;
+
+	hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
+	if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8))
+		return 0;
+
+	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
+	return rc;
+}
+
 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 {
 	int rc, cpuflags;
 
+	/*
+	 * On s390 notifications for arriving pages will be delivered directly
+	 * to the guest but the house keeping for completed pfaults is
+	 * handled outside the worker.
+	 */
+	kvm_check_async_pf_completion(vcpu);
+
 	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
 
 	if (need_resched())
@@ -758,8 +843,10 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 		rc = -EREMOTE;
 
 	} else if (current->thread.gmap_pfault) {
+		trace_kvm_s390_major_guest_pfault(vcpu);
 		current->thread.gmap_pfault = 0;
-		if (kvm_arch_fault_in_sync(vcpu) >= 0)
+		if (kvm_arch_setup_async_pf(vcpu) ||
+		    (kvm_arch_fault_in_sync(vcpu) >= 0))
 			rc = 0;
 	}
 
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index f9559b0bd620..ed4750a5bc3c 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -159,4 +159,8 @@ void exit_sie_sync(struct kvm_vcpu *vcpu);
 /* implemented in diag.c */
 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
 
+/* implemented in interrupt.c */
+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
+int psw_extint_disabled(struct kvm_vcpu *vcpu);
+
 #endif
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 87c2b3a3bd3e..fe9442d39f0e 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -224,6 +224,8 @@ unlock:
 static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
 {
 	int rc;
+	unsigned int i;
+	struct kvm_vcpu *v;
 
 	switch (parameter & 0xff) {
 	case 0:
@@ -231,6 +233,11 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
 		break;
 	case 1:
 	case 2:
+		kvm_for_each_vcpu(i, v, vcpu->kvm) {
+			v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+			kvm_clear_async_pf_completion_queue(v);
+		}
+
 		rc = SIGP_CC_ORDER_CODE_ACCEPTED;
 		break;
 	default:
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
index 3db76b2daed7..e8e7213d4cc5 100644
--- a/arch/s390/kvm/trace.h
+++ b/arch/s390/kvm/trace.h
@@ -30,6 +30,52 @@
 	TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id,		\
 		  __entry->pswmask, __entry->pswaddr, p_args)
 
+TRACE_EVENT(kvm_s390_major_guest_pfault,
+	    TP_PROTO(VCPU_PROTO_COMMON),
+	    TP_ARGS(VCPU_ARGS_COMMON),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    ),
+	    VCPU_TP_PRINTK("%s", "major fault, maybe applicable for pfault")
+	);
+
+TRACE_EVENT(kvm_s390_pfault_init,
+	    TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
+	    TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(long, pfault_token)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->pfault_token = pfault_token;
+		    ),
+	    VCPU_TP_PRINTK("init pfault token %ld", __entry->pfault_token)
+	);
+
+TRACE_EVENT(kvm_s390_pfault_done,
+	    TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
+	    TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(long, pfault_token)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->pfault_token = pfault_token;
+		    ),
+	    VCPU_TP_PRINTK("done pfault token %ld", __entry->pfault_token)
+	);
+
 /*
  * Tracepoints for SIE entry and exit.
  */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 19f717b15297..7d76401d2bb5 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -413,6 +413,8 @@ struct kvm_s390_psw {
 #define KVM_S390_PROGRAM_INT		0xfffe0001u
 #define KVM_S390_SIGP_SET_PREFIX	0xfffe0002u
 #define KVM_S390_RESTART		0xfffe0003u
+#define KVM_S390_INT_PFAULT_INIT	0xfffe0004u
+#define KVM_S390_INT_PFAULT_DONE	0xfffe0005u
 #define KVM_S390_MCHK			0xfffe1000u
 #define KVM_S390_INT_VIRTIO		0xffff2603u
 #define KVM_S390_INT_SERVICE		0xffff2401u
-- 
cgit v1.2.3


From 16f6b87ac524f073c93ea3caadfd5111d03ecb4f Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Sun, 12 Jan 2014 20:02:20 +0100
Subject: can: add explicit copyrights to can userspace header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is in the spirit of commit 2485602f1af2 (can: add explicit
copyrights to can headers). It seems I have missed can.h back then.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index e52958d7c2d1..5d9d1d140718 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@@ -8,6 +8,38 @@
  * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
  * All rights reserved.
  *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Volkswagen nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * The provided data structures and external interfaces from this code
+ * are not restricted to be used by modules with a GPL compatible license.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
  */
 
 #ifndef CAN_H
-- 
cgit v1.2.3


From a1d16e0f59506d07935ec0a929a2c9f1d6d96077 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Fri, 31 Jan 2014 10:32:15 -0300
Subject: [media] v4l2-dv-timings.h: add new 4K DMT resolutions

VESA added two new DMT timings in their latest standard document. Add these
to v4l2-dv-timings.h.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/v4l2-dv-timings.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/v4l2-dv-timings.h b/include/uapi/linux/v4l2-dv-timings.h
index be709fe29552..b6a5fe00a470 100644
--- a/include/uapi/linux/v4l2-dv-timings.h
+++ b/include/uapi/linux/v4l2-dv-timings.h
@@ -823,4 +823,21 @@
 		V4L2_DV_FL_REDUCED_BLANKING) \
 }
 
+/* 4K resolutions */
+#define V4L2_DV_BT_DMT_4096X2160P60_RB { \
+	.type = V4L2_DV_BT_656_1120, \
+	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+		556744000, 8, 32, 40, 48, 8, 6, 0, 0, 0, \
+		V4L2_DV_BT_STD_DMT | V4L2_DV_BT_STD_CVT, \
+		V4L2_DV_FL_REDUCED_BLANKING) \
+}
+
+#define V4L2_DV_BT_DMT_4096X2160P59_94_RB { \
+	.type = V4L2_DV_BT_656_1120, \
+	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+		556188000, 8, 32, 40, 48, 8, 6, 0, 0, 0, \
+		V4L2_DV_BT_STD_DMT | V4L2_DV_BT_STD_CVT, \
+		V4L2_DV_FL_REDUCED_BLANKING) \
+}
+
 #endif
-- 
cgit v1.2.3


From b2c8b3ea871e478ac144f617d015d3aa55fc3aa8 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 4 Feb 2014 15:45:11 +0000
Subject: GFS2: Allocate block for xattr at inode alloc time, if required

This is another step towards improving the allocation of xattr
blocks at inode allocation time. Here we take advantage of
Christoph's recent work on ACLs to allocate a block for the
xattrs early if we know that we will be adding ACLs to the
inode later on. The advantage of that is that it is much
more likely that we'll get a contiguous run of two blocks
where the first is the inode and the second is the xattr block.

We still have to fall back to the original system in case we
don't get the requested two contiguous blocks, or in case the
ACLs are too large to fit into the block.

Future patches will move more of the ACL setting code further
up the gfs2_inode_create() function. Also, I'd like to be
able to do the same thing with the xattrs from LSMs in
due course, too. That way we should be able to slowly reduce
the number of independent transactions, at least in the
most common cases.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c                  | 50 ++++++++++++++++++++++++++++++++++------
 fs/gfs2/rgrp.c                   |  2 +-
 include/uapi/linux/gfs2_ondisk.h |  4 ++--
 3 files changed, 46 insertions(+), 10 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 5c524180c98e..ec455b92091f 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -376,12 +376,11 @@ static void munge_mode_uid_gid(const struct gfs2_inode *dip,
 		inode->i_gid = current_fsgid();
 }
 
-static int alloc_dinode(struct gfs2_inode *ip, u32 flags)
+static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_alloc_parms ap = { .target = RES_DINODE, .aflags = flags, };
+	struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, };
 	int error;
-	int dblocks = 1;
 
 	error = gfs2_quota_lock_check(ip);
 	if (error)
@@ -391,11 +390,11 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags)
 	if (error)
 		goto out_quota;
 
-	error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 0);
+	error = gfs2_trans_begin(sdp, (*dblocks * RES_RG_BIT) + RES_STATFS + RES_QUOTA, 0);
 	if (error)
 		goto out_ipreserv;
 
-	error = gfs2_alloc_blocks(ip, &ip->i_no_addr, &dblocks, 1, &ip->i_generation);
+	error = gfs2_alloc_blocks(ip, &ip->i_no_addr, dblocks, 1, &ip->i_generation);
 	ip->i_no_formal_ino = ip->i_generation;
 	ip->i_inode.i_ino = ip->i_no_addr;
 	ip->i_goal = ip->i_no_addr;
@@ -427,6 +426,33 @@ static void gfs2_init_dir(struct buffer_head *dibh,
 	
 }
 
+/**
+ * gfs2_init_xattr - Initialise an xattr block for a new inode
+ * @ip: The inode in question
+ *
+ * This sets up an empty xattr block for a new inode, ready to
+ * take any ACLs, LSM xattrs, etc.
+ */
+
+static void gfs2_init_xattr(struct gfs2_inode *ip)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	struct buffer_head *bh;
+	struct gfs2_ea_header *ea;
+
+	bh = gfs2_meta_new(ip->i_gl, ip->i_eattr);
+	gfs2_trans_add_meta(ip->i_gl, bh);
+	gfs2_metatype_set(bh, GFS2_METATYPE_EA, GFS2_FORMAT_EA);
+	gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
+
+	ea = GFS2_EA_BH2FIRST(bh);
+	ea->ea_rec_len = cpu_to_be32(sdp->sd_jbsize);
+	ea->ea_type = GFS2_EATYPE_UNUSED;
+	ea->ea_flags = GFS2_EAFLAG_LAST;
+
+	brelse(bh);
+}
+
 /**
  * init_dinode - Fill in a new dinode structure
  * @dip: The directory this inode is being created in
@@ -580,6 +606,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	struct dentry *d;
 	int error;
 	u32 aflags = 0;
+	unsigned blocks = 1;
 	struct gfs2_diradd da = { .bh = NULL, };
 
 	if (!name->len || name->len > GFS2_FNAMESIZE)
@@ -676,10 +703,15 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	    (dip->i_diskflags & GFS2_DIF_TOPDIR))
 		aflags |= GFS2_AF_ORLOV;
 
-	error = alloc_dinode(ip, aflags);
+	if (default_acl || acl)
+		blocks++;
+
+	error = alloc_dinode(ip, aflags, &blocks);
 	if (error)
 		goto fail_free_inode;
 
+	gfs2_set_inode_blocks(inode, blocks);
+
 	error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
 	if (error)
 		goto fail_free_inode;
@@ -689,10 +721,14 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (error)
 		goto fail_free_inode;
 
-	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
+	error = gfs2_trans_begin(sdp, blocks, 0);
 	if (error)
 		goto fail_gunlock2;
 
+	if (blocks > 1) {
+		ip->i_eattr = ip->i_no_addr + 1;
+		gfs2_init_xattr(ip);
+	}
 	init_dinode(dip, ip, symname);
 	gfs2_trans_end(sdp);
 
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index a1da21349235..c13e4c5e9967 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -2296,7 +2296,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 
 	gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
 	if (dinode)
-		gfs2_trans_add_unrevoke(sdp, block, 1);
+		gfs2_trans_add_unrevoke(sdp, block, *nblocks);
 
 	gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
 
diff --git a/include/uapi/linux/gfs2_ondisk.h b/include/uapi/linux/gfs2_ondisk.h
index 0f24c07aed51..310020816809 100644
--- a/include/uapi/linux/gfs2_ondisk.h
+++ b/include/uapi/linux/gfs2_ondisk.h
@@ -347,9 +347,9 @@ struct gfs2_leaf {
  * metadata header. Each inode, if it has extended attributes, will
  * have either a single block containing the extended attribute headers
  * or a single indirect block pointing to blocks containing the
- * extended attribure headers.
+ * extended attribute headers.
  *
- * The maximim size of the data part of an extended attribute is 64k
+ * The maximum size of the data part of an extended attribute is 64k
  * so the number of blocks required depends upon block size. Since the
  * block size also determines the number of pointers in an indirect
  * block, its a fairly complicated calculation to work out the maximum
-- 
cgit v1.2.3


From 1df4a51082df6e5b0b8eb70df81885b9b4c9e6ec Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Wed, 15 Jan 2014 00:00:47 +0200
Subject: cfg80211: Allow BSS hint to be provided for connect

This clarifies the expected driver behavior on the older
NL80211_ATTR_MAC and NL80211_ATTR_WIPHY_FREQ attributes and adds a new
set of similar attributes with _HINT postfix to enable use of a
recommendation of the initial BSS to choose. This can be helpful for
some drivers that can avoid an additional full scan on connection
request if the information is provided to them (user space tools like
wpa_supplicant already has that information available based on earlier
scans).

In addition, this can be used to get more expected behavior for cases
where a specific BSS should be picked first based on operations like
Interworking network selection or WPS. These cases were already easily
addressed with drivers that leave BSS selection to user space, but there
was no convenient way to do this with drivers that take care of BSS
selection internally without using the NL80211_ATTR_MAC which is not
really desired since it is needed for other purposes to force the
association to remain with the same BSS.

Signed-off-by: Jouni Malinen <j@w1.fi>
[add const, fix policy]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  8 ++++++++
 include/uapi/linux/nl80211.h | 20 ++++++++++++++++++--
 net/wireless/nl80211.c       | 13 +++++++++++++
 3 files changed, 39 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index b1f84b05c67e..572005981366 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1701,8 +1701,14 @@ struct cfg80211_ibss_params {
  *
  * @channel: The channel to use or %NULL if not specified (auto-select based
  *	on scan results)
+ * @channel_hint: The channel of the recommended BSS for initial connection or
+ *	%NULL if not specified
  * @bssid: The AP BSSID or %NULL if not specified (auto-select based on scan
  *	results)
+ * @bssid_hint: The recommended AP BSSID for initial connection to the BSS or
+ *	%NULL if not specified. Unlike the @bssid parameter, the driver is
+ *	allowed to ignore this @bssid_hint if it has knowledge of a better BSS
+ *	to use.
  * @ssid: SSID
  * @ssid_len: Length of ssid in octets
  * @auth_type: Authentication type (algorithm)
@@ -1725,7 +1731,9 @@ struct cfg80211_ibss_params {
  */
 struct cfg80211_connect_params {
 	struct ieee80211_channel *channel;
+	struct ieee80211_channel *channel_hint;
 	u8 *bssid;
+	const u8 *bssid_hint;
 	u8 *ssid;
 	size_t ssid_len;
 	enum nl80211_auth_type auth_type;
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 91054fd660e0..e57de3318068 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -418,8 +418,18 @@
  *	%NL80211_ATTR_SSID attribute, and can optionally specify the association
  *	IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_USE_MFP,
  *	%NL80211_ATTR_MAC, %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT,
- *	%NL80211_ATTR_CONTROL_PORT_ETHERTYPE and
- *	%NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT.
+ *	%NL80211_ATTR_CONTROL_PORT_ETHERTYPE,
+ *	%NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT, %NL80211_ATTR_MAC_HINT, and
+ *	%NL80211_ATTR_WIPHY_FREQ_HINT.
+ *	If included, %NL80211_ATTR_MAC and %NL80211_ATTR_WIPHY_FREQ are
+ *	restrictions on BSS selection, i.e., they effectively prevent roaming
+ *	within the ESS. %NL80211_ATTR_MAC_HINT and %NL80211_ATTR_WIPHY_FREQ_HINT
+ *	can be included to provide a recommendation of the initial BSS while
+ *	allowing the driver to roam to other BSSes within the ESS and also to
+ *	ignore this recommendation if the indicated BSS is not ideal. Only one
+ *	set of BSSID,frequency parameters is used (i.e., either the enforcing
+ *	%NL80211_ATTR_MAC,%NL80211_ATTR_WIPHY_FREQ or the less strict
+ *	%NL80211_ATTR_MAC_HINT and %NL80211_ATTR_WIPHY_FREQ_HINT).
  *	Background scan period can optionally be
  *	specified in %NL80211_ATTR_BG_SCAN_PERIOD,
  *	if not specified default background scan configuration
@@ -1555,6 +1565,9 @@ enum nl80211_commands {
  *	data is in the format defined for the payload of the QoS Map Set element
  *	in IEEE Std 802.11-2012, 8.4.2.97.
  *
+ * @NL80211_ATTR_MAC_HINT: MAC address recommendation as initial BSS
+ * @NL80211_ATTR_WIPHY_FREQ_HINT: frequency of the recommended initial BSS
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1883,6 +1896,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_QOS_MAP,
 
+	NL80211_ATTR_MAC_HINT,
+	NL80211_ATTR_WIPHY_FREQ_HINT,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7a742594916e..6e7d580ec645 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -382,6 +382,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_VENDOR_DATA] = { .type = NLA_BINARY },
 	[NL80211_ATTR_QOS_MAP] = { .type = NLA_BINARY,
 				   .len = IEEE80211_QOS_MAP_LEN_MAX },
+	[NL80211_ATTR_MAC_HINT] = { .len = ETH_ALEN },
+	[NL80211_ATTR_WIPHY_FREQ_HINT] = { .type = NLA_U32 },
 };
 
 /* policy for the key attributes */
@@ -6984,6 +6986,9 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 
 	if (info->attrs[NL80211_ATTR_MAC])
 		connect.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
+	else if (info->attrs[NL80211_ATTR_MAC_HINT])
+		connect.bssid_hint =
+			nla_data(info->attrs[NL80211_ATTR_MAC_HINT]);
 	connect.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
 	connect.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
 
@@ -7008,6 +7013,14 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 		if (!connect.channel ||
 		    connect.channel->flags & IEEE80211_CHAN_DISABLED)
 			return -EINVAL;
+	} else if (info->attrs[NL80211_ATTR_WIPHY_FREQ_HINT]) {
+		connect.channel_hint =
+			ieee80211_get_channel(wiphy,
+			    nla_get_u32(
+				    info->attrs[NL80211_ATTR_WIPHY_FREQ_HINT]));
+		if (!connect.channel_hint ||
+		    connect.channel_hint->flags & IEEE80211_CHAN_DISABLED)
+			return -EINVAL;
 	}
 
 	if (connect.privacy && info->attrs[NL80211_ATTR_KEYS]) {
-- 
cgit v1.2.3


From b43504cf75b8b8773ee70c90bcd691282e151b9a Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Wed, 15 Jan 2014 00:01:08 +0200
Subject: cfg80211: Advertise maximum associated STAs in AP mode

This allows drivers to advertise the maximum number of associated
stations they support in AP mode (including P2P GO). User space
applications can use this for cleaner way of handling the limit (e.g.,
hostapd rejecting IEEE 802.11 authentication without manual
configuration of the limit) or to figure out what type of use cases can
be executed with multiple devices before trying and failing.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 7 +++++++
 include/uapi/linux/nl80211.h | 9 +++++++++
 net/wireless/nl80211.c       | 6 ++++++
 3 files changed, 22 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 572005981366..117bea0210be 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2883,6 +2883,11 @@ struct wiphy_vendor_command {
  * @n_vendor_commands: number of vendor commands
  * @vendor_events: array of vendor events supported by the hardware
  * @n_vendor_events: number of vendor events
+ *
+ * @max_ap_assoc_sta: maximum number of associated stations supported in AP mode
+ *	(including P2P GO) or 0 to indicate no such limit is advertised. The
+ *	driver is allowed to advertise a theoretical limit that it can reach in
+ *	some cases, but may not always reach.
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -2998,6 +3003,8 @@ struct wiphy {
 	const struct nl80211_vendor_cmd_info *vendor_events;
 	int n_vendor_commands, n_vendor_events;
 
+	u16 max_ap_assoc_sta;
+
 	char priv[0] __aligned(NETDEV_ALIGN);
 };
 
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index e57de3318068..9a86c8bf6da6 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1568,6 +1568,13 @@ enum nl80211_commands {
  * @NL80211_ATTR_MAC_HINT: MAC address recommendation as initial BSS
  * @NL80211_ATTR_WIPHY_FREQ_HINT: frequency of the recommended initial BSS
  *
+ * @NL80211_ATTR_MAX_AP_ASSOC_STA: Device attribute that indicates how many
+ *	associated stations are supported in AP mode (including P2P GO); u32.
+ *	Since drivers may not have a fixed limit on the maximum number (e.g.,
+ *	other concurrent operations may affect this), drivers are allowed to
+ *	advertise values that cannot always be met. In such cases, an attempt
+ *	to add a new station entry with @NL80211_CMD_NEW_STATION may fail.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1899,6 +1906,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_MAC_HINT,
 	NL80211_ATTR_WIPHY_FREQ_HINT,
 
+	NL80211_ATTR_MAX_AP_ASSOC_STA,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 6e7d580ec645..b2ac1410b113 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1588,6 +1588,12 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
 		    (nla_put_flag(msg, NL80211_ATTR_SUPPORT_5_MHZ) ||
 		     nla_put_flag(msg, NL80211_ATTR_SUPPORT_10_MHZ)))
 			goto nla_put_failure;
+
+		if (dev->wiphy.max_ap_assoc_sta &&
+		    nla_put_u32(msg, NL80211_ATTR_MAX_AP_ASSOC_STA,
+				dev->wiphy.max_ap_assoc_sta))
+			goto nla_put_failure;
+
 		state->split_start++;
 		break;
 	case 11:
-- 
cgit v1.2.3


From 0b9323f600a3e80a488e3bd14ddfa85b294e630d Mon Sep 17 00:00:00 2001
From: Janusz Dziedzic <janusz.dziedzic@tieto.com>
Date: Wed, 8 Jan 2014 08:46:02 +0100
Subject: nl80211: add Guard Interval support for set_bitrate_mask

Allow to force SGI, LGI.
Mainly for test purpose.

Signed-off-by: Janusz Dziedzic <janusz.dziedzic@tieto.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 1 +
 include/uapi/linux/nl80211.h | 8 ++++++++
 net/wireless/nl80211.c       | 7 +++++++
 3 files changed, 16 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index d10ba3a1bfa8..d5e57bf678a6 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1776,6 +1776,7 @@ struct cfg80211_bitrate_mask {
 		u32 legacy;
 		u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN];
 		u16 vht_mcs[NL80211_VHT_NSS_MAX];
+		enum nl80211_txrate_gi gi;
 	} control[IEEE80211_NUM_BANDS];
 };
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 9a86c8bf6da6..53e56cf7c0fe 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3156,6 +3156,7 @@ enum nl80211_key_attributes {
  *	in an array of MCS numbers.
  * @NL80211_TXRATE_VHT: VHT rates allowed for TX rate selection,
  *	see &struct nl80211_txrate_vht
+ * @NL80211_TXRATE_GI: configure GI, see &enum nl80211_txrate_gi
  * @__NL80211_TXRATE_AFTER_LAST: internal
  * @NL80211_TXRATE_MAX: highest TX rate attribute
  */
@@ -3164,6 +3165,7 @@ enum nl80211_tx_rate_attributes {
 	NL80211_TXRATE_LEGACY,
 	NL80211_TXRATE_HT,
 	NL80211_TXRATE_VHT,
+	NL80211_TXRATE_GI,
 
 	/* keep last */
 	__NL80211_TXRATE_AFTER_LAST,
@@ -3181,6 +3183,12 @@ struct nl80211_txrate_vht {
 	__u16 mcs[NL80211_VHT_NSS_MAX];
 };
 
+enum nl80211_txrate_gi {
+	NL80211_TXRATE_DEFAULT_GI,
+	NL80211_TXRATE_FORCE_SGI,
+	NL80211_TXRATE_FORCE_LGI,
+};
+
 /**
  * enum nl80211_band - Frequency band
  * @NL80211_BAND_2GHZ: 2.4 GHz ISM band
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 09b6da8ffdfe..a3515ebbd32b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -7447,6 +7447,7 @@ static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = {
 	[NL80211_TXRATE_HT] = { .type = NLA_BINARY,
 				.len = NL80211_MAX_SUPP_HT_RATES },
 	[NL80211_TXRATE_VHT] = { .len = sizeof(struct nl80211_txrate_vht)},
+	[NL80211_TXRATE_GI] = { .type = NLA_U8 },
 };
 
 static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
@@ -7527,6 +7528,12 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
 					mask.control[band].vht_mcs))
 				return -EINVAL;
 		}
+		if (tb[NL80211_TXRATE_GI]) {
+			mask.control[band].gi =
+				nla_get_u8(tb[NL80211_TXRATE_GI]);
+			if (mask.control[band].gi > NL80211_TXRATE_FORCE_LGI)
+				return -EINVAL;
+		}
 
 		if (mask.control[band].legacy == 0) {
 			/* don't allow empty legacy rates if HT or VHT
-- 
cgit v1.2.3


From ea73cbce4e1fd93113301532ad98041b119bc85a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 24 Jan 2014 10:53:53 +0100
Subject: nl80211: fix scheduled scan RSSI matchset attribute confusion

The scheduled scan matchsets were intended to be a list of filters,
with the found BSS having to pass at least one of them to be passed
to the host. When the RSSI attribute was added, however, this was
broken and currently wpa_supplicant adds that attribute in its own
matchset; however, it doesn't intend that to mean that anything
that passes the RSSI filter should be passed to the host, instead
it wants it to mean that everything needs to also have higher RSSI.

This is semantically problematic because we have a list of filters
like [ SSID1, SSID2, SSID3, RSSI ] with no real indication which
one should be OR'ed and which one AND'ed.

To fix this, move the RSSI filter attribute into each matchset. As
we need to stay backward compatible, treat a matchset with only the
RSSI attribute as a "default RSSI filter" for all other matchsets,
but only if there are other matchsets (an RSSI-only matchset by
itself is still desirable.)

To make driver implementation easier, keep a global min_rssi_thold
for the entire request as well. The only affected driver is ath6kl.

I found this when I looked into the code after Raja Mani submitted
a patch fixing the n_match_sets calculation to disregard the RSSI,
but that patch didn't address the semantic issue.

Reported-by: Raja Mani <rmani@qti.qualcomm.com>
Acked-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath6kl/cfg80211.c | 19 +++++---
 drivers/net/wireless/iwlwifi/mvm/scan.c    |  3 ++
 include/net/cfg80211.h                     |  9 ++--
 include/uapi/linux/nl80211.h               | 10 ++++-
 net/wireless/nl80211.c                     | 70 ++++++++++++++++++++++++++----
 5 files changed, 92 insertions(+), 19 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index fd4c89df67e1..eba32f56850a 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -3256,6 +3256,15 @@ static int ath6kl_cfg80211_sscan_start(struct wiphy *wiphy,
 	struct ath6kl_vif *vif = netdev_priv(dev);
 	u16 interval;
 	int ret, rssi_thold;
+	int n_match_sets = request->n_match_sets;
+
+	/*
+	 * If there's a matchset w/o an SSID, then assume it's just for
+	 * the RSSI (nothing else is currently supported) and ignore it.
+	 * The device only supports a global RSSI filter that we set below.
+	 */
+	if (n_match_sets == 1 && !request->match_sets[0].ssid.ssid_len)
+		n_match_sets = 0;
 
 	if (ar->state != ATH6KL_STATE_ON)
 		return -EIO;
@@ -3268,11 +3277,11 @@ static int ath6kl_cfg80211_sscan_start(struct wiphy *wiphy,
 	ret = ath6kl_set_probed_ssids(ar, vif, request->ssids,
 				      request->n_ssids,
 				      request->match_sets,
-				      request->n_match_sets);
+				      n_match_sets);
 	if (ret < 0)
 		return ret;
 
-	if (!request->n_match_sets) {
+	if (!n_match_sets) {
 		ret = ath6kl_wmi_bssfilter_cmd(ar->wmi, vif->fw_vif_idx,
 					       ALL_BSS_FILTER, 0);
 		if (ret < 0)
@@ -3286,12 +3295,12 @@ static int ath6kl_cfg80211_sscan_start(struct wiphy *wiphy,
 
 	if (test_bit(ATH6KL_FW_CAPABILITY_RSSI_SCAN_THOLD,
 		     ar->fw_capabilities)) {
-		if (request->rssi_thold <= NL80211_SCAN_RSSI_THOLD_OFF)
+		if (request->min_rssi_thold <= NL80211_SCAN_RSSI_THOLD_OFF)
 			rssi_thold = 0;
-		else if (request->rssi_thold < -127)
+		else if (request->min_rssi_thold < -127)
 			rssi_thold = -127;
 		else
-			rssi_thold = request->rssi_thold;
+			rssi_thold = request->min_rssi_thold;
 
 		ret = ath6kl_wmi_set_rssi_filter_cmd(ar->wmi, vif->fw_vif_idx,
 						     rssi_thold);
diff --git a/drivers/net/wireless/iwlwifi/mvm/scan.c b/drivers/net/wireless/iwlwifi/mvm/scan.c
index 0e0007960612..9674bfd978f1 100644
--- a/drivers/net/wireless/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/iwlwifi/mvm/scan.c
@@ -595,6 +595,9 @@ static void iwl_scan_offload_build_ssid(struct cfg80211_sched_scan_request *req,
 	 * config match list.
 	 */
 	for (i = 0; i < req->n_match_sets && i < PROBE_OPTION_MAX; i++) {
+		/* skip empty SSID matchsets */
+		if (!req->match_sets[i].ssid.ssid_len)
+			continue;
 		scan->direct_scan[i].id = WLAN_EID_SSID;
 		scan->direct_scan[i].len = req->match_sets[i].ssid.ssid_len;
 		memcpy(scan->direct_scan[i].ssid, req->match_sets[i].ssid.ssid,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index d5e57bf678a6..009290e36d15 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1394,10 +1394,12 @@ struct cfg80211_scan_request {
 /**
  * struct cfg80211_match_set - sets of attributes to match
  *
- * @ssid: SSID to be matched
+ * @ssid: SSID to be matched; may be zero-length for no match (RSSI only)
+ * @rssi_thold: don't report scan results below this threshold (in s32 dBm)
  */
 struct cfg80211_match_set {
 	struct cfg80211_ssid ssid;
+	s32 rssi_thold;
 };
 
 /**
@@ -1420,7 +1422,8 @@ struct cfg80211_match_set {
  * @dev: the interface
  * @scan_start: start time of the scheduled scan
  * @channels: channels to scan
- * @rssi_thold: don't report scan results below this threshold (in s32 dBm)
+ * @min_rssi_thold: for drivers only supporting a single threshold, this
+ *	contains the minimum over all matchsets
  */
 struct cfg80211_sched_scan_request {
 	struct cfg80211_ssid *ssids;
@@ -1433,7 +1436,7 @@ struct cfg80211_sched_scan_request {
 	u32 flags;
 	struct cfg80211_match_set *match_sets;
 	int n_match_sets;
-	s32 rssi_thold;
+	s32 min_rssi_thold;
 
 	/* internal */
 	struct wiphy *wiphy;
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 53e56cf7c0fe..474ce32e0797 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2467,9 +2467,15 @@ enum nl80211_reg_rule_attr {
  * enum nl80211_sched_scan_match_attr - scheduled scan match attributes
  * @__NL80211_SCHED_SCAN_MATCH_ATTR_INVALID: attribute number 0 is reserved
  * @NL80211_SCHED_SCAN_MATCH_ATTR_SSID: SSID to be used for matching,
- * only report BSS with matching SSID.
+ *	only report BSS with matching SSID.
  * @NL80211_SCHED_SCAN_MATCH_ATTR_RSSI: RSSI threshold (in dBm) for reporting a
- *	BSS in scan results. Filtering is turned off if not specified.
+ *	BSS in scan results. Filtering is turned off if not specified. Note that
+ *	if this attribute is in a match set of its own, then it is treated as
+ *	the default value for all matchsets with an SSID, rather than being a
+ *	matchset of its own without an RSSI filter. This is due to problems with
+ *	how this API was implemented in the past. Also, due to the same problem,
+ *	the only way to create a matchset with only an RSSI filter (with this
+ *	attribute) is if there's only a single matchset with the RSSI attribute.
  * @NL80211_SCHED_SCAN_MATCH_ATTR_MAX: highest scheduled scan filter
  *	attribute number currently defined
  * @__NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST: internal use
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 043bfbd58b56..20be186f7f77 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5467,6 +5467,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
 	enum ieee80211_band band;
 	size_t ie_len;
 	struct nlattr *tb[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1];
+	s32 default_match_rssi = NL80211_SCAN_RSSI_THOLD_OFF;
 
 	if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) ||
 	    !rdev->ops->sched_scan_start)
@@ -5501,11 +5502,40 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
 	if (n_ssids > wiphy->max_sched_scan_ssids)
 		return -EINVAL;
 
-	if (info->attrs[NL80211_ATTR_SCHED_SCAN_MATCH])
+	/*
+	 * First, count the number of 'real' matchsets. Due to an issue with
+	 * the old implementation, matchsets containing only the RSSI attribute
+	 * (NL80211_SCHED_SCAN_MATCH_ATTR_RSSI) are considered as the 'default'
+	 * RSSI for all matchsets, rather than their own matchset for reporting
+	 * all APs with a strong RSSI. This is needed to be compatible with
+	 * older userspace that treated a matchset with only the RSSI as the
+	 * global RSSI for all other matchsets - if there are other matchsets.
+	 */
+	if (info->attrs[NL80211_ATTR_SCHED_SCAN_MATCH]) {
 		nla_for_each_nested(attr,
 				    info->attrs[NL80211_ATTR_SCHED_SCAN_MATCH],
-				    tmp)
-			n_match_sets++;
+				    tmp) {
+			struct nlattr *rssi;
+
+			err = nla_parse(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
+					nla_data(attr), nla_len(attr),
+					nl80211_match_policy);
+			if (err)
+				return err;
+			/* add other standalone attributes here */
+			if (tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID]) {
+				n_match_sets++;
+				continue;
+			}
+			rssi = tb[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI];
+			if (rssi)
+				default_match_rssi = nla_get_s32(rssi);
+		}
+	}
+
+	/* However, if there's no other matchset, add the RSSI one */
+	if (!n_match_sets && default_match_rssi != NL80211_SCAN_RSSI_THOLD_OFF)
+		n_match_sets = 1;
 
 	if (n_match_sets > wiphy->max_match_sets)
 		return -EINVAL;
@@ -5633,6 +5663,15 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
 				goto out_free;
 			ssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID];
 			if (ssid) {
+				if (WARN_ON(i >= n_match_sets)) {
+					/* this indicates a programming error,
+					 * the loop above should have verified
+					 * things properly
+					 */
+					err = -EINVAL;
+					goto out_free;
+				}
+
 				if (nla_len(ssid) > IEEE80211_MAX_SSID_LEN) {
 					err = -EINVAL;
 					goto out_free;
@@ -5641,15 +5680,28 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
 				       nla_data(ssid), nla_len(ssid));
 				request->match_sets[i].ssid.ssid_len =
 					nla_len(ssid);
+				/* special attribute - old implemenation w/a */
+				request->match_sets[i].rssi_thold =
+					default_match_rssi;
+				rssi = tb[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI];
+				if (rssi)
+					request->match_sets[i].rssi_thold =
+						nla_get_s32(rssi);
 			}
-			rssi = tb[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI];
-			if (rssi)
-				request->rssi_thold = nla_get_u32(rssi);
-			else
-				request->rssi_thold =
-						   NL80211_SCAN_RSSI_THOLD_OFF;
 			i++;
 		}
+
+		/* there was no other matchset, so the RSSI one is alone */
+		if (i == 0)
+			request->match_sets[0].rssi_thold = default_match_rssi;
+
+		request->min_rssi_thold = INT_MAX;
+		for (i = 0; i < n_match_sets; i++)
+			request->min_rssi_thold =
+				min(request->match_sets[i].rssi_thold,
+				    request->min_rssi_thold);
+	} else {
+		request->min_rssi_thold = NL80211_SCAN_RSSI_THOLD_OFF;
 	}
 
 	if (info->attrs[NL80211_ATTR_IE]) {
-- 
cgit v1.2.3


From 9752482083066af7ac18a5ca376ff35d72418b29 Mon Sep 17 00:00:00 2001
From: Janusz Dziedzic <janusz.dziedzic@tieto.com>
Date: Thu, 30 Jan 2014 09:52:20 +0100
Subject: cfg80211: regulatory introduce maximum bandwidth calculation

In case we will get regulatory request with rule
where max_bandwidth_khz is set to 0 handle this
case as a special one.

If max_bandwidth_khz == 0 we should calculate maximum
available bandwidth base on all frequency contiguous rules.
In case we need auto calculation we just have to set:

country PL: DFS-ETSI
        (2402 - 2482 @ 40), (N/A, 20)
        (5170 - 5250 @ AUTO), (N/A, 20)
        (5250 - 5330 @ AUTO), (N/A, 20), DFS
        (5490 - 5710 @ 80), (N/A, 27), DFS

This mean we will calculate maximum bw for rules where
AUTO (N/A) were set, 160MHz (5330 - 5170) in example above.
So we will get:
        (5170 - 5250 @ 160), (N/A, 20)
        (5250 - 5330 @ 160), (N/A, 20), DFS

In other case:
country FR: DFS-ETSI
        (2402 - 2482 @ 40), (N/A, 20)
        (5170 - 5250 @ AUTO), (N/A, 20)
        (5250 - 5330 @ 80), (N/A, 20), DFS
        (5490 - 5710 @ 80), (N/A, 27), DFS

We will get 80MHz (5250 - 5170):
        (5170 - 5250 @ 80), (N/A, 20)
        (5250 - 5330 @ 80), (N/A, 20), DFS

Base on this calculations we will set correct channel
bandwidth flags (eg. IEEE80211_CHAN_NO_80MHZ).

We don't need any changes in CRDA or internal regulatory.

Signed-off-by: Janusz Dziedzic <janusz.dziedzic@tieto.com>
[extend nl80211 description a bit, fix typo]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |   5 +-
 net/wireless/nl80211.c       |  15 +++--
 net/wireless/reg.c           | 130 +++++++++++++++++++++++++++++++++++++------
 net/wireless/reg.h           |   2 +
 4 files changed, 130 insertions(+), 22 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 474ce32e0797..a12e6cae5132 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2437,7 +2437,10 @@ enum nl80211_reg_type {
  * 	in KHz. This is not a center a frequency but an actual regulatory
  * 	band edge.
  * @NL80211_ATTR_FREQ_RANGE_MAX_BW: maximum allowed bandwidth for this
- * 	frequency range, in KHz.
+ *	frequency range, in KHz. If not present or 0, maximum available
+ *	bandwidth should be calculated base on contiguous rules and wider
+ *	channels will be allowed to cross multiple contiguous/overlapping
+ *	frequency ranges.
  * @NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN: the maximum allowed antenna gain
  * 	for a given frequency range. The value is in mBi (100 * dBi).
  * 	If you don't have one then don't send this.
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index be091ddd43a4..ebea1a197afb 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4626,8 +4626,6 @@ static int parse_reg_rule(struct nlattr *tb[],
 		return -EINVAL;
 	if (!tb[NL80211_ATTR_FREQ_RANGE_END])
 		return -EINVAL;
-	if (!tb[NL80211_ATTR_FREQ_RANGE_MAX_BW])
-		return -EINVAL;
 	if (!tb[NL80211_ATTR_POWER_RULE_MAX_EIRP])
 		return -EINVAL;
 
@@ -4637,8 +4635,9 @@ static int parse_reg_rule(struct nlattr *tb[],
 		nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_START]);
 	freq_range->end_freq_khz =
 		nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_END]);
-	freq_range->max_bandwidth_khz =
-		nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]);
+	if (tb[NL80211_ATTR_FREQ_RANGE_MAX_BW])
+		freq_range->max_bandwidth_khz =
+			nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]);
 
 	power_rule->max_eirp =
 		nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]);
@@ -5108,6 +5107,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 		const struct ieee80211_reg_rule *reg_rule;
 		const struct ieee80211_freq_range *freq_range;
 		const struct ieee80211_power_rule *power_rule;
+		unsigned int max_bandwidth_khz;
 
 		reg_rule = &regdom->reg_rules[i];
 		freq_range = &reg_rule->freq_range;
@@ -5117,6 +5117,11 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 		if (!nl_reg_rule)
 			goto nla_put_failure_rcu;
 
+		max_bandwidth_khz = freq_range->max_bandwidth_khz;
+		if (!max_bandwidth_khz)
+			max_bandwidth_khz = reg_get_max_bandwidth(regdom,
+								  reg_rule);
+
 		if (nla_put_u32(msg, NL80211_ATTR_REG_RULE_FLAGS,
 				reg_rule->flags) ||
 		    nla_put_u32(msg, NL80211_ATTR_FREQ_RANGE_START,
@@ -5124,7 +5129,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 		    nla_put_u32(msg, NL80211_ATTR_FREQ_RANGE_END,
 				freq_range->end_freq_khz) ||
 		    nla_put_u32(msg, NL80211_ATTR_FREQ_RANGE_MAX_BW,
-				freq_range->max_bandwidth_khz) ||
+				max_bandwidth_khz) ||
 		    nla_put_u32(msg, NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN,
 				power_rule->max_antenna_gain) ||
 		    nla_put_u32(msg, NL80211_ATTR_POWER_RULE_MAX_EIRP,
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 27807bf0cdfc..27c5253e7a61 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -538,6 +538,61 @@ static const struct ieee80211_regdomain *reg_get_regdomain(struct wiphy *wiphy)
 	return get_cfg80211_regdom();
 }
 
+unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd,
+				   const struct ieee80211_reg_rule *rule)
+{
+	const struct ieee80211_freq_range *freq_range = &rule->freq_range;
+	const struct ieee80211_freq_range *freq_range_tmp;
+	const struct ieee80211_reg_rule *tmp;
+	u32 start_freq, end_freq, idx, no;
+
+	for (idx = 0; idx < rd->n_reg_rules; idx++)
+		if (rule == &rd->reg_rules[idx])
+			break;
+
+	if (idx == rd->n_reg_rules)
+		return 0;
+
+	/* get start_freq */
+	no = idx;
+
+	while (no) {
+		tmp = &rd->reg_rules[--no];
+		freq_range_tmp = &tmp->freq_range;
+
+		if (freq_range_tmp->end_freq_khz < freq_range->start_freq_khz)
+			break;
+
+		if (freq_range_tmp->max_bandwidth_khz)
+			break;
+
+		freq_range = freq_range_tmp;
+	}
+
+	start_freq = freq_range->start_freq_khz;
+
+	/* get end_freq */
+	freq_range = &rule->freq_range;
+	no = idx;
+
+	while (no < rd->n_reg_rules - 1) {
+		tmp = &rd->reg_rules[++no];
+		freq_range_tmp = &tmp->freq_range;
+
+		if (freq_range_tmp->start_freq_khz > freq_range->end_freq_khz)
+			break;
+
+		if (freq_range_tmp->max_bandwidth_khz)
+			break;
+
+		freq_range = freq_range_tmp;
+	}
+
+	end_freq = freq_range->end_freq_khz;
+
+	return end_freq - start_freq;
+}
+
 /* Sanity check on a regulatory rule */
 static bool is_valid_reg_rule(const struct ieee80211_reg_rule *rule)
 {
@@ -646,7 +701,9 @@ reg_intersect_dfs_region(const enum nl80211_dfs_regions dfs_region1,
  * Helper for regdom_intersect(), this does the real
  * mathematical intersection fun
  */
-static int reg_rules_intersect(const struct ieee80211_reg_rule *rule1,
+static int reg_rules_intersect(const struct ieee80211_regdomain *rd1,
+			       const struct ieee80211_regdomain *rd2,
+			       const struct ieee80211_reg_rule *rule1,
 			       const struct ieee80211_reg_rule *rule2,
 			       struct ieee80211_reg_rule *intersected_rule)
 {
@@ -654,7 +711,7 @@ static int reg_rules_intersect(const struct ieee80211_reg_rule *rule1,
 	struct ieee80211_freq_range *freq_range;
 	const struct ieee80211_power_rule *power_rule1, *power_rule2;
 	struct ieee80211_power_rule *power_rule;
-	u32 freq_diff;
+	u32 freq_diff, max_bandwidth1, max_bandwidth2;
 
 	freq_range1 = &rule1->freq_range;
 	freq_range2 = &rule2->freq_range;
@@ -668,8 +725,24 @@ static int reg_rules_intersect(const struct ieee80211_reg_rule *rule1,
 					 freq_range2->start_freq_khz);
 	freq_range->end_freq_khz = min(freq_range1->end_freq_khz,
 				       freq_range2->end_freq_khz);
-	freq_range->max_bandwidth_khz = min(freq_range1->max_bandwidth_khz,
-					    freq_range2->max_bandwidth_khz);
+
+	max_bandwidth1 = freq_range1->max_bandwidth_khz;
+	max_bandwidth2 = freq_range2->max_bandwidth_khz;
+
+	/*
+	 * In case max_bandwidth1 == 0 and max_bandwith2 == 0 set
+	 * output bandwidth as 0 (auto calculation). Next we will
+	 * calculate this correctly in handle_channel function.
+	 * In other case calculate output bandwidth here.
+	 */
+	if (max_bandwidth1 || max_bandwidth2) {
+		if (!max_bandwidth1)
+			max_bandwidth1 = reg_get_max_bandwidth(rd1, rule1);
+		if (!max_bandwidth2)
+			max_bandwidth2 = reg_get_max_bandwidth(rd2, rule2);
+	}
+
+	freq_range->max_bandwidth_khz = min(max_bandwidth1, max_bandwidth2);
 
 	freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz;
 	if (freq_range->max_bandwidth_khz > freq_diff)
@@ -729,7 +802,8 @@ regdom_intersect(const struct ieee80211_regdomain *rd1,
 		rule1 = &rd1->reg_rules[x];
 		for (y = 0; y < rd2->n_reg_rules; y++) {
 			rule2 = &rd2->reg_rules[y];
-			if (!reg_rules_intersect(rule1, rule2, &dummy_rule))
+			if (!reg_rules_intersect(rd1, rd2, rule1, rule2,
+						 &dummy_rule))
 				num_rules++;
 		}
 	}
@@ -754,7 +828,8 @@ regdom_intersect(const struct ieee80211_regdomain *rd1,
 			 * a memcpy()
 			 */
 			intersected_rule = &rd->reg_rules[rule_idx];
-			r = reg_rules_intersect(rule1, rule2, intersected_rule);
+			r = reg_rules_intersect(rd1, rd2, rule1, rule2,
+						intersected_rule);
 			/*
 			 * No need to memset here the intersected rule here as
 			 * we're not using the stack anymore
@@ -909,6 +984,8 @@ static void handle_channel(struct wiphy *wiphy,
 	const struct ieee80211_freq_range *freq_range = NULL;
 	struct wiphy *request_wiphy = NULL;
 	struct regulatory_request *lr = get_last_request();
+	const struct ieee80211_regdomain *regd;
+	u32 max_bandwidth_khz;
 
 	request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx);
 
@@ -950,11 +1027,18 @@ static void handle_channel(struct wiphy *wiphy,
 	power_rule = &reg_rule->power_rule;
 	freq_range = &reg_rule->freq_range;
 
-	if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(40))
+	max_bandwidth_khz = freq_range->max_bandwidth_khz;
+	/* Check if auto calculation requested */
+	if (!max_bandwidth_khz) {
+		regd = reg_get_regdomain(wiphy);
+		max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule);
+	}
+
+	if (max_bandwidth_khz < MHZ_TO_KHZ(40))
 		bw_flags = IEEE80211_CHAN_NO_HT40;
-	if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(80))
+	if (max_bandwidth_khz < MHZ_TO_KHZ(80))
 		bw_flags |= IEEE80211_CHAN_NO_80MHZ;
-	if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(160))
+	if (max_bandwidth_khz < MHZ_TO_KHZ(160))
 		bw_flags |= IEEE80211_CHAN_NO_160MHZ;
 
 	if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
@@ -1340,6 +1424,7 @@ static void handle_channel_custom(struct wiphy *wiphy,
 	const struct ieee80211_reg_rule *reg_rule = NULL;
 	const struct ieee80211_power_rule *power_rule = NULL;
 	const struct ieee80211_freq_range *freq_range = NULL;
+	u32 max_bandwidth_khz;
 
 	reg_rule = freq_reg_info_regd(wiphy, MHZ_TO_KHZ(chan->center_freq),
 				      regd);
@@ -1357,11 +1442,16 @@ static void handle_channel_custom(struct wiphy *wiphy,
 	power_rule = &reg_rule->power_rule;
 	freq_range = &reg_rule->freq_range;
 
-	if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(40))
+	max_bandwidth_khz = freq_range->max_bandwidth_khz;
+	/* Check if auto calculation requested */
+	if (!max_bandwidth_khz)
+		max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule);
+
+	if (max_bandwidth_khz < MHZ_TO_KHZ(40))
 		bw_flags = IEEE80211_CHAN_NO_HT40;
-	if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(80))
+	if (max_bandwidth_khz < MHZ_TO_KHZ(80))
 		bw_flags |= IEEE80211_CHAN_NO_80MHZ;
-	if (freq_range->max_bandwidth_khz < MHZ_TO_KHZ(160))
+	if (max_bandwidth_khz < MHZ_TO_KHZ(160))
 		bw_flags |= IEEE80211_CHAN_NO_160MHZ;
 
 	chan->flags |= map_regdom_flags(reg_rule->flags) | bw_flags;
@@ -2155,6 +2245,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
 	const struct ieee80211_reg_rule *reg_rule = NULL;
 	const struct ieee80211_freq_range *freq_range = NULL;
 	const struct ieee80211_power_rule *power_rule = NULL;
+	char bw[32];
 
 	pr_info("  (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp)\n");
 
@@ -2163,22 +2254,29 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
 		freq_range = &reg_rule->freq_range;
 		power_rule = &reg_rule->power_rule;
 
+		if (!freq_range->max_bandwidth_khz)
+			snprintf(bw, 32, "%d KHz, AUTO",
+				 reg_get_max_bandwidth(rd, reg_rule));
+		else
+			snprintf(bw, 32, "%d KHz",
+				 freq_range->max_bandwidth_khz);
+
 		/*
 		 * There may not be documentation for max antenna gain
 		 * in certain regions
 		 */
 		if (power_rule->max_antenna_gain)
-			pr_info("  (%d KHz - %d KHz @ %d KHz), (%d mBi, %d mBm)\n",
+			pr_info("  (%d KHz - %d KHz @ %s), (%d mBi, %d mBm)\n",
 				freq_range->start_freq_khz,
 				freq_range->end_freq_khz,
-				freq_range->max_bandwidth_khz,
+				bw,
 				power_rule->max_antenna_gain,
 				power_rule->max_eirp);
 		else
-			pr_info("  (%d KHz - %d KHz @ %d KHz), (N/A, %d mBm)\n",
+			pr_info("  (%d KHz - %d KHz @ %s), (N/A, %d mBm)\n",
 				freq_range->start_freq_khz,
 				freq_range->end_freq_khz,
-				freq_range->max_bandwidth_khz,
+				bw,
 				power_rule->max_eirp);
 	}
 }
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index 02bd8f4b0921..18524617ab62 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -34,6 +34,8 @@ int __init regulatory_init(void);
 void regulatory_exit(void);
 
 int set_regdom(const struct ieee80211_regdomain *rd);
+unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd,
+				   const struct ieee80211_reg_rule *rule);
 
 bool reg_last_request_cell_base(void);
 
-- 
cgit v1.2.3


From 44aaada9d144a46d3de48ad81093f69d17fae96f Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 7 Feb 2014 11:23:22 +0000
Subject: GFS2: Add meta readahead field in directory entries

The intent of this new field in the directory entry is to
allow a subsequent lookup to know how many blocks, which
are contiguous with the inode, contain metadata which relates
to the inode. This will then allow the issuing of a single
read to read these blocks, rather than reading the inode
first, and then issuing a second read for the metadata.

This only works under some fairly strict conditions, since
we do not have back pointers from inodes to directory entries
we must ensure that the blocks referenced in this way will
always belong to the inode.

This rules out being able to use this system for indirect
blocks, as these can change as a result of truncate/rewrite.

So the idea here is to restrict this to xattr blocks only
for the time being. For most inodes, that means only a
single block. Also, when using ACLs and/or SELinux or
other LSMs, these will be added at inode creation time
so that they will be contiguous with the inode on disk and
also will almost always be needed when we read the inode in
for permissions checks.

Once an xattr block for an inode is allocated, it will never
change until the inode is deallocated.

This patch adds the new field, a further patch will add the
readahead in due course.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/dir.c                    | 9 +++++++++
 include/uapi/linux/gfs2_ondisk.h | 8 +++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index fa32655449c8..ffcfdd18d485 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1684,6 +1684,14 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
 	return 0;
 }
 
+static u16 gfs2_inode_ra_len(const struct gfs2_inode *ip)
+{
+	u64 where = ip->i_no_addr + 1;
+	if (ip->i_eattr == where)
+		return 1;
+	return 0;
+}
+
 /**
  * gfs2_dir_add - Add new filename into directory
  * @inode: The directory inode
@@ -1721,6 +1729,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
 			dent = gfs2_init_dirent(inode, dent, name, bh);
 			gfs2_inum_out(nip, dent);
 			dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode));
+			dent->de_rahead = cpu_to_be16(gfs2_inode_ra_len(nip));
 			tv = CURRENT_TIME;
 			if (ip->i_diskflags & GFS2_DIF_EXHASH) {
 				leaf = (struct gfs2_leaf *)bh->b_data;
diff --git a/include/uapi/linux/gfs2_ondisk.h b/include/uapi/linux/gfs2_ondisk.h
index 310020816809..db3fdd083882 100644
--- a/include/uapi/linux/gfs2_ondisk.h
+++ b/include/uapi/linux/gfs2_ondisk.h
@@ -304,7 +304,13 @@ struct gfs2_dirent {
 	__be16 de_rec_len;
 	__be16 de_name_len;
 	__be16 de_type;
-	__u8 __pad[14];
+	union {
+		__u8 __pad[14];
+		struct {
+			__be16 de_rahead;
+			__u8 pad2[12];
+		};
+	};
 };
 
 /*
-- 
cgit v1.2.3


From 5267cf02c7794953d89e9593a0d497bf43e3790d Mon Sep 17 00:00:00 2001
From: Bjarke Istrup Pedersen <gurligebis@gentoo.org>
Date: Wed, 22 Jan 2014 09:16:58 +0000
Subject: hv: Add hyperv.h to uapi headers

This patch adds the hyperv.h header to the uapi folder, and adds it to the Kbuild file.
Doing this enables compiling userspace Hyper-V tools using the installed headers.

Version 2: Split UAPI parts into new header, instead of duplicating.

Signed-off-by: Bjarke Istrup Pedersen <gurligebis@gentoo.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h      | 321 +----------------------------------------
 include/uapi/linux/Kbuild   |   1 +
 include/uapi/linux/hyperv.h | 344 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 347 insertions(+), 319 deletions(-)
 create mode 100644 include/uapi/linux/hyperv.h

(limited to 'include/uapi/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 15da677478dd..167ef47e3d6e 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -25,325 +25,9 @@
 #ifndef _HYPERV_H
 #define _HYPERV_H
 
-#include <linux/types.h>
-
-/*
- * Framework version for util services.
- */
-#define UTIL_FW_MINOR  0
-
-#define UTIL_WS2K8_FW_MAJOR  1
-#define UTIL_WS2K8_FW_VERSION     (UTIL_WS2K8_FW_MAJOR << 16 | UTIL_FW_MINOR)
-
-#define UTIL_FW_MAJOR  3
-#define UTIL_FW_VERSION     (UTIL_FW_MAJOR << 16 | UTIL_FW_MINOR)
-
-
-/*
- * Implementation of host controlled snapshot of the guest.
- */
-
-#define VSS_OP_REGISTER 128
-
-enum hv_vss_op {
-	VSS_OP_CREATE = 0,
-	VSS_OP_DELETE,
-	VSS_OP_HOT_BACKUP,
-	VSS_OP_GET_DM_INFO,
-	VSS_OP_BU_COMPLETE,
-	/*
-	 * Following operations are only supported with IC version >= 5.0
-	 */
-	VSS_OP_FREEZE, /* Freeze the file systems in the VM */
-	VSS_OP_THAW, /* Unfreeze the file systems */
-	VSS_OP_AUTO_RECOVER,
-	VSS_OP_COUNT /* Number of operations, must be last */
-};
-
-
-/*
- * Header for all VSS messages.
- */
-struct hv_vss_hdr {
-	__u8 operation;
-	__u8 reserved[7];
-} __attribute__((packed));
-
-
-/*
- * Flag values for the hv_vss_check_feature. Linux supports only
- * one value.
- */
-#define VSS_HBU_NO_AUTO_RECOVERY	0x00000005
-
-struct hv_vss_check_feature {
-	__u32 flags;
-} __attribute__((packed));
-
-struct hv_vss_check_dm_info {
-	__u32 flags;
-} __attribute__((packed));
-
-struct hv_vss_msg {
-	union {
-		struct hv_vss_hdr vss_hdr;
-		int error;
-	};
-	union {
-		struct hv_vss_check_feature vss_cf;
-		struct hv_vss_check_dm_info dm_info;
-	};
-} __attribute__((packed));
-
-/*
- * An implementation of HyperV key value pair (KVP) functionality for Linux.
- *
- *
- * Copyright (C) 2010, Novell, Inc.
- * Author : K. Y. Srinivasan <ksrinivasan@novell.com>
- *
- */
-
-/*
- * Maximum value size - used for both key names and value data, and includes
- * any applicable NULL terminators.
- *
- * Note:  This limit is somewhat arbitrary, but falls easily within what is
- * supported for all native guests (back to Win 2000) and what is reasonable
- * for the IC KVP exchange functionality.  Note that Windows Me/98/95 are
- * limited to 255 character key names.
- *
- * MSDN recommends not storing data values larger than 2048 bytes in the
- * registry.
- *
- * Note:  This value is used in defining the KVP exchange message - this value
- * cannot be modified without affecting the message size and compatibility.
- */
-
-/*
- * bytes, including any null terminators
- */
-#define HV_KVP_EXCHANGE_MAX_VALUE_SIZE          (2048)
-
-
-/*
- * Maximum key size - the registry limit for the length of an entry name
- * is 256 characters, including the null terminator
- */
-
-#define HV_KVP_EXCHANGE_MAX_KEY_SIZE            (512)
+#include <uapi/linux/hyperv.h>
 
-/*
- * In Linux, we implement the KVP functionality in two components:
- * 1) The kernel component which is packaged as part of the hv_utils driver
- * is responsible for communicating with the host and responsible for
- * implementing the host/guest protocol. 2) A user level daemon that is
- * responsible for data gathering.
- *
- * Host/Guest Protocol: The host iterates over an index and expects the guest
- * to assign a key name to the index and also return the value corresponding to
- * the key. The host will have atmost one KVP transaction outstanding at any
- * given point in time. The host side iteration stops when the guest returns
- * an error. Microsoft has specified the following mapping of key names to
- * host specified index:
- *
- *	Index		Key Name
- *	0		FullyQualifiedDomainName
- *	1		IntegrationServicesVersion
- *	2		NetworkAddressIPv4
- *	3		NetworkAddressIPv6
- *	4		OSBuildNumber
- *	5		OSName
- *	6		OSMajorVersion
- *	7		OSMinorVersion
- *	8		OSVersion
- *	9		ProcessorArchitecture
- *
- * The Windows host expects the Key Name and Key Value to be encoded in utf16.
- *
- * Guest Kernel/KVP Daemon Protocol: As noted earlier, we implement all of the
- * data gathering functionality in a user mode daemon. The user level daemon
- * is also responsible for binding the key name to the index as well. The
- * kernel and user-level daemon communicate using a connector channel.
- *
- * The user mode component first registers with the
- * the kernel component. Subsequently, the kernel component requests, data
- * for the specified keys. In response to this message the user mode component
- * fills in the value corresponding to the specified key. We overload the
- * sequence field in the cn_msg header to define our KVP message types.
- *
- *
- * The kernel component simply acts as a conduit for communication between the
- * Windows host and the user-level daemon. The kernel component passes up the
- * index received from the Host to the user-level daemon. If the index is
- * valid (supported), the corresponding key as well as its
- * value (both are strings) is returned. If the index is invalid
- * (not supported), a NULL key string is returned.
- */
-
-
-/*
- * Registry value types.
- */
-
-#define REG_SZ 1
-#define REG_U32 4
-#define REG_U64 8
-
-/*
- * As we look at expanding the KVP functionality to include
- * IP injection functionality, we need to maintain binary
- * compatibility with older daemons.
- *
- * The KVP opcodes are defined by the host and it was unfortunate
- * that I chose to treat the registration operation as part of the
- * KVP operations defined by the host.
- * Here is the level of compatibility
- * (between the user level daemon and the kernel KVP driver) that we
- * will implement:
- *
- * An older daemon will always be supported on a newer driver.
- * A given user level daemon will require a minimal version of the
- * kernel driver.
- * If we cannot handle the version differences, we will fail gracefully
- * (this can happen when we have a user level daemon that is more
- * advanced than the KVP driver.
- *
- * We will use values used in this handshake for determining if we have
- * workable user level daemon and the kernel driver. We begin by taking the
- * registration opcode out of the KVP opcode namespace. We will however,
- * maintain compatibility with the existing user-level daemon code.
- */
-
-/*
- * Daemon code not supporting IP injection (legacy daemon).
- */
-
-#define KVP_OP_REGISTER	4
-
-/*
- * Daemon code supporting IP injection.
- * The KVP opcode field is used to communicate the
- * registration information; so define a namespace that
- * will be distinct from the host defined KVP opcode.
- */
-
-#define KVP_OP_REGISTER1 100
-
-enum hv_kvp_exchg_op {
-	KVP_OP_GET = 0,
-	KVP_OP_SET,
-	KVP_OP_DELETE,
-	KVP_OP_ENUMERATE,
-	KVP_OP_GET_IP_INFO,
-	KVP_OP_SET_IP_INFO,
-	KVP_OP_COUNT /* Number of operations, must be last. */
-};
-
-enum hv_kvp_exchg_pool {
-	KVP_POOL_EXTERNAL = 0,
-	KVP_POOL_GUEST,
-	KVP_POOL_AUTO,
-	KVP_POOL_AUTO_EXTERNAL,
-	KVP_POOL_AUTO_INTERNAL,
-	KVP_POOL_COUNT /* Number of pools, must be last. */
-};
-
-/*
- * Some Hyper-V status codes.
- */
-
-#define HV_S_OK				0x00000000
-#define HV_E_FAIL			0x80004005
-#define HV_S_CONT			0x80070103
-#define HV_ERROR_NOT_SUPPORTED		0x80070032
-#define HV_ERROR_MACHINE_LOCKED		0x800704F7
-#define HV_ERROR_DEVICE_NOT_CONNECTED	0x8007048F
-#define HV_INVALIDARG			0x80070057
-#define HV_GUID_NOTFOUND		0x80041002
-
-#define ADDR_FAMILY_NONE	0x00
-#define ADDR_FAMILY_IPV4	0x01
-#define ADDR_FAMILY_IPV6	0x02
-
-#define MAX_ADAPTER_ID_SIZE	128
-#define MAX_IP_ADDR_SIZE	1024
-#define MAX_GATEWAY_SIZE	512
-
-
-struct hv_kvp_ipaddr_value {
-	__u16	adapter_id[MAX_ADAPTER_ID_SIZE];
-	__u8	addr_family;
-	__u8	dhcp_enabled;
-	__u16	ip_addr[MAX_IP_ADDR_SIZE];
-	__u16	sub_net[MAX_IP_ADDR_SIZE];
-	__u16	gate_way[MAX_GATEWAY_SIZE];
-	__u16	dns_addr[MAX_IP_ADDR_SIZE];
-} __attribute__((packed));
-
-
-struct hv_kvp_hdr {
-	__u8 operation;
-	__u8 pool;
-	__u16 pad;
-} __attribute__((packed));
-
-struct hv_kvp_exchg_msg_value {
-	__u32 value_type;
-	__u32 key_size;
-	__u32 value_size;
-	__u8 key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
-	union {
-		__u8 value[HV_KVP_EXCHANGE_MAX_VALUE_SIZE];
-		__u32 value_u32;
-		__u64 value_u64;
-	};
-} __attribute__((packed));
-
-struct hv_kvp_msg_enumerate {
-	__u32 index;
-	struct hv_kvp_exchg_msg_value data;
-} __attribute__((packed));
-
-struct hv_kvp_msg_get {
-	struct hv_kvp_exchg_msg_value data;
-};
-
-struct hv_kvp_msg_set {
-	struct hv_kvp_exchg_msg_value data;
-};
-
-struct hv_kvp_msg_delete {
-	__u32 key_size;
-	__u8 key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
-};
-
-struct hv_kvp_register {
-	__u8 version[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
-};
-
-struct hv_kvp_msg {
-	union {
-		struct hv_kvp_hdr	kvp_hdr;
-		int error;
-	};
-	union {
-		struct hv_kvp_msg_get		kvp_get;
-		struct hv_kvp_msg_set		kvp_set;
-		struct hv_kvp_msg_delete	kvp_delete;
-		struct hv_kvp_msg_enumerate	kvp_enum_data;
-		struct hv_kvp_ipaddr_value      kvp_ip_val;
-		struct hv_kvp_register		kvp_register;
-	} body;
-} __attribute__((packed));
-
-struct hv_kvp_ip_msg {
-	__u8 operation;
-	__u8 pool;
-	struct hv_kvp_ipaddr_value      kvp_ip_val;
-} __attribute__((packed));
-
-#ifdef __KERNEL__
+#include <linux/types.h>
 #include <linux/scatterlist.h>
 #include <linux/list.h>
 #include <linux/uuid.h>
@@ -1465,5 +1149,4 @@ void hv_vss_onchannelcallback(void *);
 
 extern __u32 vmbus_proto_version;
 
-#endif /* __KERNEL__ */
 #endif /* _HYPERV_H */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 3ce25b5d75a9..6929571b79b0 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -139,6 +139,7 @@ header-y += hid.h
 header-y += hiddev.h
 header-y += hidraw.h
 header-y += hpet.h
+header-y += hyperv.h
 header-y += hysdn_if.h
 header-y += i2c-dev.h
 header-y += i2c.h
diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h
new file mode 100644
index 000000000000..1861f8e2ce2b
--- /dev/null
+++ b/include/uapi/linux/hyperv.h
@@ -0,0 +1,344 @@
+/*
+ *
+ * Copyright (c) 2011, Microsoft Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Authors:
+ *   Haiyang Zhang <haiyangz@microsoft.com>
+ *   Hank Janssen  <hjanssen@microsoft.com>
+ *   K. Y. Srinivasan <kys@microsoft.com>
+ *
+ */
+
+#ifndef _UAPI_HYPERV_H
+#define _UAPI_HYPERV_H
+
+/*
+ * Framework version for util services.
+ */
+#define UTIL_FW_MINOR  0
+
+#define UTIL_WS2K8_FW_MAJOR  1
+#define UTIL_WS2K8_FW_VERSION     (UTIL_WS2K8_FW_MAJOR << 16 | UTIL_FW_MINOR)
+
+#define UTIL_FW_MAJOR  3
+#define UTIL_FW_VERSION     (UTIL_FW_MAJOR << 16 | UTIL_FW_MINOR)
+
+
+/*
+ * Implementation of host controlled snapshot of the guest.
+ */
+
+#define VSS_OP_REGISTER 128
+
+enum hv_vss_op {
+	VSS_OP_CREATE = 0,
+	VSS_OP_DELETE,
+	VSS_OP_HOT_BACKUP,
+	VSS_OP_GET_DM_INFO,
+	VSS_OP_BU_COMPLETE,
+	/*
+	 * Following operations are only supported with IC version >= 5.0
+	 */
+	VSS_OP_FREEZE, /* Freeze the file systems in the VM */
+	VSS_OP_THAW, /* Unfreeze the file systems */
+	VSS_OP_AUTO_RECOVER,
+	VSS_OP_COUNT /* Number of operations, must be last */
+};
+
+
+/*
+ * Header for all VSS messages.
+ */
+struct hv_vss_hdr {
+	__u8 operation;
+	__u8 reserved[7];
+} __attribute__((packed));
+
+
+/*
+ * Flag values for the hv_vss_check_feature. Linux supports only
+ * one value.
+ */
+#define VSS_HBU_NO_AUTO_RECOVERY	0x00000005
+
+struct hv_vss_check_feature {
+	__u32 flags;
+} __attribute__((packed));
+
+struct hv_vss_check_dm_info {
+	__u32 flags;
+} __attribute__((packed));
+
+struct hv_vss_msg {
+	union {
+		struct hv_vss_hdr vss_hdr;
+		int error;
+	};
+	union {
+		struct hv_vss_check_feature vss_cf;
+		struct hv_vss_check_dm_info dm_info;
+	};
+} __attribute__((packed));
+
+/*
+ * An implementation of HyperV key value pair (KVP) functionality for Linux.
+ *
+ *
+ * Copyright (C) 2010, Novell, Inc.
+ * Author : K. Y. Srinivasan <ksrinivasan@novell.com>
+ *
+ */
+
+/*
+ * Maximum value size - used for both key names and value data, and includes
+ * any applicable NULL terminators.
+ *
+ * Note:  This limit is somewhat arbitrary, but falls easily within what is
+ * supported for all native guests (back to Win 2000) and what is reasonable
+ * for the IC KVP exchange functionality.  Note that Windows Me/98/95 are
+ * limited to 255 character key names.
+ *
+ * MSDN recommends not storing data values larger than 2048 bytes in the
+ * registry.
+ *
+ * Note:  This value is used in defining the KVP exchange message - this value
+ * cannot be modified without affecting the message size and compatibility.
+ */
+
+/*
+ * bytes, including any null terminators
+ */
+#define HV_KVP_EXCHANGE_MAX_VALUE_SIZE          (2048)
+
+
+/*
+ * Maximum key size - the registry limit for the length of an entry name
+ * is 256 characters, including the null terminator
+ */
+
+#define HV_KVP_EXCHANGE_MAX_KEY_SIZE            (512)
+
+/*
+ * In Linux, we implement the KVP functionality in two components:
+ * 1) The kernel component which is packaged as part of the hv_utils driver
+ * is responsible for communicating with the host and responsible for
+ * implementing the host/guest protocol. 2) A user level daemon that is
+ * responsible for data gathering.
+ *
+ * Host/Guest Protocol: The host iterates over an index and expects the guest
+ * to assign a key name to the index and also return the value corresponding to
+ * the key. The host will have atmost one KVP transaction outstanding at any
+ * given point in time. The host side iteration stops when the guest returns
+ * an error. Microsoft has specified the following mapping of key names to
+ * host specified index:
+ *
+ *	Index		Key Name
+ *	0		FullyQualifiedDomainName
+ *	1		IntegrationServicesVersion
+ *	2		NetworkAddressIPv4
+ *	3		NetworkAddressIPv6
+ *	4		OSBuildNumber
+ *	5		OSName
+ *	6		OSMajorVersion
+ *	7		OSMinorVersion
+ *	8		OSVersion
+ *	9		ProcessorArchitecture
+ *
+ * The Windows host expects the Key Name and Key Value to be encoded in utf16.
+ *
+ * Guest Kernel/KVP Daemon Protocol: As noted earlier, we implement all of the
+ * data gathering functionality in a user mode daemon. The user level daemon
+ * is also responsible for binding the key name to the index as well. The
+ * kernel and user-level daemon communicate using a connector channel.
+ *
+ * The user mode component first registers with the
+ * the kernel component. Subsequently, the kernel component requests, data
+ * for the specified keys. In response to this message the user mode component
+ * fills in the value corresponding to the specified key. We overload the
+ * sequence field in the cn_msg header to define our KVP message types.
+ *
+ *
+ * The kernel component simply acts as a conduit for communication between the
+ * Windows host and the user-level daemon. The kernel component passes up the
+ * index received from the Host to the user-level daemon. If the index is
+ * valid (supported), the corresponding key as well as its
+ * value (both are strings) is returned. If the index is invalid
+ * (not supported), a NULL key string is returned.
+ */
+
+
+/*
+ * Registry value types.
+ */
+
+#define REG_SZ 1
+#define REG_U32 4
+#define REG_U64 8
+
+/*
+ * As we look at expanding the KVP functionality to include
+ * IP injection functionality, we need to maintain binary
+ * compatibility with older daemons.
+ *
+ * The KVP opcodes are defined by the host and it was unfortunate
+ * that I chose to treat the registration operation as part of the
+ * KVP operations defined by the host.
+ * Here is the level of compatibility
+ * (between the user level daemon and the kernel KVP driver) that we
+ * will implement:
+ *
+ * An older daemon will always be supported on a newer driver.
+ * A given user level daemon will require a minimal version of the
+ * kernel driver.
+ * If we cannot handle the version differences, we will fail gracefully
+ * (this can happen when we have a user level daemon that is more
+ * advanced than the KVP driver.
+ *
+ * We will use values used in this handshake for determining if we have
+ * workable user level daemon and the kernel driver. We begin by taking the
+ * registration opcode out of the KVP opcode namespace. We will however,
+ * maintain compatibility with the existing user-level daemon code.
+ */
+
+/*
+ * Daemon code not supporting IP injection (legacy daemon).
+ */
+
+#define KVP_OP_REGISTER	4
+
+/*
+ * Daemon code supporting IP injection.
+ * The KVP opcode field is used to communicate the
+ * registration information; so define a namespace that
+ * will be distinct from the host defined KVP opcode.
+ */
+
+#define KVP_OP_REGISTER1 100
+
+enum hv_kvp_exchg_op {
+	KVP_OP_GET = 0,
+	KVP_OP_SET,
+	KVP_OP_DELETE,
+	KVP_OP_ENUMERATE,
+	KVP_OP_GET_IP_INFO,
+	KVP_OP_SET_IP_INFO,
+	KVP_OP_COUNT /* Number of operations, must be last. */
+};
+
+enum hv_kvp_exchg_pool {
+	KVP_POOL_EXTERNAL = 0,
+	KVP_POOL_GUEST,
+	KVP_POOL_AUTO,
+	KVP_POOL_AUTO_EXTERNAL,
+	KVP_POOL_AUTO_INTERNAL,
+	KVP_POOL_COUNT /* Number of pools, must be last. */
+};
+
+/*
+ * Some Hyper-V status codes.
+ */
+
+#define HV_S_OK				0x00000000
+#define HV_E_FAIL			0x80004005
+#define HV_S_CONT			0x80070103
+#define HV_ERROR_NOT_SUPPORTED		0x80070032
+#define HV_ERROR_MACHINE_LOCKED		0x800704F7
+#define HV_ERROR_DEVICE_NOT_CONNECTED	0x8007048F
+#define HV_INVALIDARG			0x80070057
+#define HV_GUID_NOTFOUND		0x80041002
+
+#define ADDR_FAMILY_NONE	0x00
+#define ADDR_FAMILY_IPV4	0x01
+#define ADDR_FAMILY_IPV6	0x02
+
+#define MAX_ADAPTER_ID_SIZE	128
+#define MAX_IP_ADDR_SIZE	1024
+#define MAX_GATEWAY_SIZE	512
+
+
+struct hv_kvp_ipaddr_value {
+	__u16	adapter_id[MAX_ADAPTER_ID_SIZE];
+	__u8	addr_family;
+	__u8	dhcp_enabled;
+	__u16	ip_addr[MAX_IP_ADDR_SIZE];
+	__u16	sub_net[MAX_IP_ADDR_SIZE];
+	__u16	gate_way[MAX_GATEWAY_SIZE];
+	__u16	dns_addr[MAX_IP_ADDR_SIZE];
+} __attribute__((packed));
+
+
+struct hv_kvp_hdr {
+	__u8 operation;
+	__u8 pool;
+	__u16 pad;
+} __attribute__((packed));
+
+struct hv_kvp_exchg_msg_value {
+	__u32 value_type;
+	__u32 key_size;
+	__u32 value_size;
+	__u8 key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
+	union {
+		__u8 value[HV_KVP_EXCHANGE_MAX_VALUE_SIZE];
+		__u32 value_u32;
+		__u64 value_u64;
+	};
+} __attribute__((packed));
+
+struct hv_kvp_msg_enumerate {
+	__u32 index;
+	struct hv_kvp_exchg_msg_value data;
+} __attribute__((packed));
+
+struct hv_kvp_msg_get {
+	struct hv_kvp_exchg_msg_value data;
+};
+
+struct hv_kvp_msg_set {
+	struct hv_kvp_exchg_msg_value data;
+};
+
+struct hv_kvp_msg_delete {
+	__u32 key_size;
+	__u8 key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
+};
+
+struct hv_kvp_register {
+	__u8 version[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
+};
+
+struct hv_kvp_msg {
+	union {
+		struct hv_kvp_hdr	kvp_hdr;
+		int error;
+	};
+	union {
+		struct hv_kvp_msg_get		kvp_get;
+		struct hv_kvp_msg_set		kvp_set;
+		struct hv_kvp_msg_delete	kvp_delete;
+		struct hv_kvp_msg_enumerate	kvp_enum_data;
+		struct hv_kvp_ipaddr_value      kvp_ip_val;
+		struct hv_kvp_register		kvp_register;
+	} body;
+} __attribute__((packed));
+
+struct hv_kvp_ip_msg {
+	__u8 operation;
+	__u8 pool;
+	struct hv_kvp_ipaddr_value      kvp_ip_val;
+} __attribute__((packed));
+
+#endif /* _UAPI_HYPERV_H */
-- 
cgit v1.2.3


From e7aceef4ac3180bd93d4c0d3fe23775850b6c31d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 12 Feb 2014 14:21:15 +0100
Subject: cfg80211: remove NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL

There's no driver using this flag and consequently no userspace
application is actually looking at it. As it seems unlikely for
any driver to start using it, remove it and the (very little)
code that used it.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 7 +------
 net/wireless/chan.c          | 6 +-----
 2 files changed, 2 insertions(+), 11 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index a12e6cae5132..ba1f7625625c 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3843,11 +3843,6 @@ enum nl80211_ap_sme_features {
  * @NL80211_FEATURE_CELL_BASE_REG_HINTS: This driver has been tested
  *	to work properly to suppport receiving regulatory hints from
  *	cellular base stations.
- * @NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL: If this is set, an active
- *	P2P Device (%NL80211_IFTYPE_P2P_DEVICE) requires its own channel
- *	in the interface combinations, even when it's only used for scan
- *	and remain-on-channel. This could be due to, for example, the
- *	remain-on-channel implementation requiring a channel context.
  * @NL80211_FEATURE_SAE: This driver supports simultaneous authentication of
  *	equals (SAE) with user space SME (NL80211_CMD_AUTHENTICATE) in station
  *	mode
@@ -3889,7 +3884,7 @@ enum nl80211_feature_flags {
 	NL80211_FEATURE_HT_IBSS				= 1 << 1,
 	NL80211_FEATURE_INACTIVITY_TIMER		= 1 << 2,
 	NL80211_FEATURE_CELL_BASE_REG_HINTS		= 1 << 3,
-	NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL	= 1 << 4,
+	/* bit 4 is reserved - don't use */
 	NL80211_FEATURE_SAE				= 1 << 5,
 	NL80211_FEATURE_LOW_PRIORITY_SCAN		= 1 << 6,
 	NL80211_FEATURE_SCAN_FLUSH			= 1 << 7,
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index f8ab7df1ab0d..5946450c5406 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -705,12 +705,8 @@ cfg80211_get_chan_state(struct wireless_dev *wdev,
 	case NL80211_IFTYPE_MONITOR:
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_WDS:
-		/* these interface types don't really have a channel */
-		return;
 	case NL80211_IFTYPE_P2P_DEVICE:
-		if (wdev->wiphy->features &
-				NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL)
-			*chanmode = CHAN_MODE_EXCLUSIVE;
+		/* these interface types don't really have a channel */
 		return;
 	case NL80211_IFTYPE_UNSPECIFIED:
 	case NUM_NL80211_IFTYPES:
-- 
cgit v1.2.3


From bf8fc60a62db3fa99d2ded50e68bf3e5be027ebe Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 12 Feb 2014 22:13:57 +0000
Subject: ethtool: Expand documentation of struct ethtool_cmd

struct ethtool_cmd has very limited documentation; it contains
several obscure or obsolete fields and several with non-obvious
interpretation.

Replace the inline comments (and some others below) with a full
explanation of the semantics as well as I understand them, in
kernel-doc format.  Formally deprecate some fields that seem to be of
historical use only.

Extend the comment about 32/64-bit compatibility to cover all
ethtool structures.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 124 +++++++++++++++++++++++++++++++------------
 1 file changed, 90 insertions(+), 34 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 38dbafaa5341..c4d5eb2bfac5 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -16,37 +16,97 @@
 #include <linux/types.h>
 #include <linux/if_ether.h>
 
-/* This should work for both 32 and 64 bit userland. */
+/* All structures exposed to userland should be defined such that they
+ * have the same layout for 32-bit and 64-bit userland.
+ */
+
+/**
+ * struct ethtool_cmd - link control and status
+ * @cmd: Command number = %ETHTOOL_GSET or %ETHTOOL_SSET
+ * @supported: Bitmask of %SUPPORTED_* flags for the link modes,
+ *	physical connectors and other link features for which the
+ *	interface supports autonegotiation or auto-detection.
+ *	Read-only.
+ * @advertising: Bitmask of %ADVERTISED_* flags for the link modes,
+ *	physical connectors and other link features that are
+ *	advertised through autonegotiation or enabled for
+ *	auto-detection.
+ * @speed: Low bits of the speed
+ * @duplex: Duplex mode; one of %DUPLEX_*
+ * @port: Physical connector type; one of %PORT_*
+ * @phy_address: MDIO address of PHY (transceiver); 0 or 255 if not
+ *	applicable.  For clause 45 PHYs this is the PRTAD.
+ * @transceiver: Historically used to distinguish different possible
+ *	PHY types, but not in a consistent way.  Deprecated.
+ * @autoneg: Enable/disable autonegotiation and auto-detection;
+ *	either %AUTONEG_DISABLE or %AUTONEG_ENABLE
+ * @mdio_support: Bitmask of %ETH_MDIO_SUPPORTS_* flags for the MDIO
+ *	protocols supported by the interface; 0 if unknown.
+ *	Read-only.
+ * @maxtxpkt: Historically used to report TX IRQ coalescing; now
+ *	obsoleted by &struct ethtool_coalesce.  Read-only; deprecated.
+ * @maxrxpkt: Historically used to report RX IRQ coalescing; now
+ *	obsoleted by &struct ethtool_coalesce.  Read-only; deprecated.
+ * @speed_hi: High bits of the speed
+ * @eth_tp_mdix: Ethernet twisted-pair MDI(-X) status; one of
+ *	%ETH_TP_MDI_*.  If the status is unknown or not applicable, the
+ *	value will be %ETH_TP_MDI_INVALID.  Read-only.
+ * @eth_tp_mdix_ctrl: Ethernet twisted pair MDI(-X) control; one of
+ *	%ETH_TP_MDI_*.  If MDI(-X) control is not implemented, reads
+ *	yield %ETH_TP_MDI_INVALID and writes may be ignored or rejected.
+ *	When written successfully, the link should be renegotiated if
+ *	necessary.
+ * @lp_advertising: Bitmask of %ADVERTISED_* flags for the link modes
+ *	and other link features that the link partner advertised
+ *	through autonegotiation; 0 if unknown or not applicable.
+ *	Read-only.
+ *
+ * The link speed in Mbps is split between @speed and @speed_hi.  Use
+ * the ethtool_cmd_speed() and ethtool_cmd_speed_set() functions to
+ * access it.
+ *
+ * If autonegotiation is disabled, the speed and @duplex represent the
+ * fixed link mode and are writable if the driver supports multiple
+ * link modes.  If it is enabled then they are read-only; if the link
+ * is up they represent the negotiated link mode; if the link is down,
+ * the speed is 0, %SPEED_UNKNOWN or the highest enabled speed and
+ * @duplex is %DUPLEX_UNKNOWN or the best enabled duplex mode.
+ *
+ * Some hardware interfaces may have multiple PHYs and/or physical
+ * connectors fitted or do not allow the driver to detect which are
+ * fitted.  For these interfaces @port and/or @phy_address may be
+ * writable, possibly dependent on @autoneg being %AUTONEG_DISABLE.
+ * Otherwise, attempts to write different values may be ignored or
+ * rejected.
+ *
+ * Users should assume that all fields not marked read-only are
+ * writable and subject to validation by the driver.  They should use
+ * %ETHTOOL_GSET to get the current values before making specific
+ * changes and then applying them with %ETHTOOL_SSET.
+ *
+ * Drivers that implement set_settings() should validate all fields
+ * other than @cmd that are not described as read-only or deprecated,
+ * and must ignore all fields described as read-only.
+ *
+ * Deprecated fields should be ignored by both users and drivers.
+ */
 struct ethtool_cmd {
 	__u32	cmd;
-	__u32	supported;	/* Features this interface supports */
-	__u32	advertising;	/* Features this interface advertises */
-	__u16	speed;	        /* The forced speed (lower bits) in
-				 * Mbps. Please use
-				 * ethtool_cmd_speed()/_set() to
-				 * access it */
-	__u8	duplex;		/* Duplex, half or full */
-	__u8	port;		/* Which connector port */
-	__u8	phy_address;	/* MDIO PHY address (PRTAD for clause 45).
-				 * May be read-only or read-write
-				 * depending on the driver.
-				 */
-	__u8	transceiver;	/* Which transceiver to use */
-	__u8	autoneg;	/* Enable or disable autonegotiation */
-	__u8	mdio_support;	/* MDIO protocols supported.  Read-only.
-				 * Not set by all drivers.
-				 */
-	__u32	maxtxpkt;	/* Tx pkts before generating tx int */
-	__u32	maxrxpkt;	/* Rx pkts before generating rx int */
-	__u16	speed_hi;       /* The forced speed (upper
-				 * bits) in Mbps. Please use
-				 * ethtool_cmd_speed()/_set() to
-				 * access it */
-	__u8	eth_tp_mdix;	/* twisted pair MDI-X status */
-	__u8    eth_tp_mdix_ctrl; /* twisted pair MDI-X control, when set,
-				   * link should be renegotiated if necessary
-				   */
-	__u32	lp_advertising;	/* Features the link partner advertises */
+	__u32	supported;
+	__u32	advertising;
+	__u16	speed;
+	__u8	duplex;
+	__u8	port;
+	__u8	phy_address;
+	__u8	transceiver;
+	__u8	autoneg;
+	__u8	mdio_support;
+	__u32	maxtxpkt;
+	__u32	maxrxpkt;
+	__u16	speed_hi;
+	__u8	eth_tp_mdix;
+	__u8	eth_tp_mdix_ctrl;
+	__u32	lp_advertising;
 	__u32	reserved[2];
 };
 
@@ -905,7 +965,6 @@ enum ethtool_sfeatures_retval_bits {
 #define SPARC_ETH_GSET		ETHTOOL_GSET
 #define SPARC_ETH_SSET		ETHTOOL_SSET
 
-/* Indicates what features are supported by the interface. */
 #define SUPPORTED_10baseT_Half		(1 << 0)
 #define SUPPORTED_10baseT_Full		(1 << 1)
 #define SUPPORTED_100baseT_Half		(1 << 2)
@@ -934,7 +993,6 @@ enum ethtool_sfeatures_retval_bits {
 #define SUPPORTED_40000baseSR4_Full	(1 << 25)
 #define SUPPORTED_40000baseLR4_Full	(1 << 26)
 
-/* Indicates what features are advertised by the interface. */
 #define ADVERTISED_10baseT_Half		(1 << 0)
 #define ADVERTISED_10baseT_Full		(1 << 1)
 #define ADVERTISED_100baseT_Half	(1 << 2)
@@ -999,9 +1057,7 @@ enum ethtool_sfeatures_retval_bits {
 #define XCVR_DUMMY2		0x03
 #define XCVR_DUMMY3		0x04
 
-/* Enable or disable autonegotiation.  If this is set to enable,
- * the forced link modes above are completely ignored.
- */
+/* Enable or disable autonegotiation. */
 #define AUTONEG_DISABLE		0x00
 #define AUTONEG_ENABLE		0x01
 
-- 
cgit v1.2.3


From daba1b6bc1cb89c462bdd444c29c023dda3459e6 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 12 Feb 2014 22:14:03 +0000
Subject: ethtool: Expand documentation of struct ethtool_drvinfo

Replace the inline comments (and some others below) with a full
explanation of the semantics, in kernel-doc format.  Specify which
strings may be empty.  Document the relationship with other commands.

Replace the 'deprecation' of some fields with a proper explanation of
the conversion to generalised string sets, as userland programs may
not be able to assume that ETHTOOL_GSSET_INFO is available.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 56 ++++++++++++++++++++++++++++++--------------
 1 file changed, 39 insertions(+), 17 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index c4d5eb2bfac5..cc523cb53b07 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -139,28 +139,50 @@ static inline __u32 ethtool_cmd_speed(const struct ethtool_cmd *ep)
 
 #define ETHTOOL_FWVERS_LEN	32
 #define ETHTOOL_BUSINFO_LEN	32
-/* these strings are set to whatever the driver author decides... */
+
+/**
+ * struct ethtool_drvinfo - general driver and device information
+ * @cmd: Command number = %ETHTOOL_GDRVINFO
+ * @driver: Driver short name.  This should normally match the name
+ *	in its bus driver structure (e.g. pci_driver::name).  Must
+ *	not be an empty string.
+ * @version: Driver version string; may be an empty string
+ * @fw_version: Firmware version string; may be an empty string
+ * @bus_info: Device bus address.  This should match the dev_name()
+ *	string for the underlying bus device, if there is one.  May be
+ *	an empty string.
+ * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and
+ *	%ETHTOOL_SPFLAGS commands; also the number of strings in the
+ *	%ETH_SS_PRIV_FLAGS set
+ * @n_stats: Number of u64 statistics returned by the %ETHTOOL_GSTATS
+ *	command; also the number of strings in the %ETH_SS_STATS set
+ * @testinfo_len: Number of results returned by the %ETHTOOL_TEST
+ *	command; also the number of strings in the %ETH_SS_TEST set
+ * @eedump_len: Size of EEPROM accessible through the %ETHTOOL_GEEPROM
+ *	and %ETHTOOL_SEEPROM commands, in bytes
+ * @regdump_len: Size of register dump returned by the %ETHTOOL_GREGS
+ *	command, in bytes
+ *
+ * Users can use the %ETHTOOL_GSSET_INFO command to get the number of
+ * strings in any string set (from Linux 2.6.34).
+ *
+ * Drivers should set at most @driver, @version, @fw_version and
+ * @bus_info in their get_drvinfo() implementation.  The ethtool
+ * core fills in the other fields using other driver operations.
+ */
 struct ethtool_drvinfo {
 	__u32	cmd;
-	char	driver[32];	/* driver short name, "tulip", "eepro100" */
-	char	version[32];	/* driver version string */
-	char	fw_version[ETHTOOL_FWVERS_LEN];	/* firmware version string */
-	char	bus_info[ETHTOOL_BUSINFO_LEN];	/* Bus info for this IF. */
-				/* For PCI devices, use pci_name(pci_dev). */
+	char	driver[32];
+	char	version[32];
+	char	fw_version[ETHTOOL_FWVERS_LEN];
+	char	bus_info[ETHTOOL_BUSINFO_LEN];
 	char	reserved1[32];
 	char	reserved2[12];
-				/*
-				 * Some struct members below are filled in
-				 * using ops->get_sset_count().  Obtaining
-				 * this info from ethtool_drvinfo is now
-				 * deprecated; Use ETHTOOL_GSSET_INFO
-				 * instead.
-				 */
-	__u32	n_priv_flags;	/* number of flags valid in ETHTOOL_GPFLAGS */
-	__u32	n_stats;	/* number of u64's from ETHTOOL_GSTATS */
+	__u32	n_priv_flags;
+	__u32	n_stats;
 	__u32	testinfo_len;
-	__u32	eedump_len;	/* Size of data from ETHTOOL_GEEPROM (bytes) */
-	__u32	regdump_len;	/* Size of data from ETHTOOL_GREGS (bytes) */
+	__u32	eedump_len;
+	__u32	regdump_len;
 };
 
 #define SOPASS_MAX	6
-- 
cgit v1.2.3


From 02d59f3fdb6a5d77a12549482811d404842e2c95 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 12 Feb 2014 22:14:07 +0000
Subject: ethtool: Expand documentation of struct ethtool_wol

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index cc523cb53b07..15a42de87fec 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -186,12 +186,21 @@ struct ethtool_drvinfo {
 };
 
 #define SOPASS_MAX	6
-/* wake-on-lan settings */
+
+/**
+ * struct ethtool_wolinfo - Wake-On-Lan configuration
+ * @cmd: Command number = %ETHTOOL_GWOL or %ETHTOOL_SWOL
+ * @supported: Bitmask of %WAKE_* flags for supported Wake-On-Lan modes.
+ *	Read-only.
+ * @wolopts: Bitmask of %WAKE_* flags for enabled Wake-On-Lan modes.
+ * @sopass: SecureOn(tm) password; meaningful only if %WAKE_MAGICSECURE
+ *	is set in @wolopts.
+ */
 struct ethtool_wolinfo {
 	__u32	cmd;
 	__u32	supported;
 	__u32	wolopts;
-	__u8	sopass[SOPASS_MAX]; /* SecureOn(tm) password */
+	__u8	sopass[SOPASS_MAX];
 };
 
 /* for passing single values */
-- 
cgit v1.2.3


From 09fb8bb068c86ff6b79dd7d4cb8f706bedf86d73 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 12 Feb 2014 22:14:13 +0000
Subject: ethtool: Expand documentation of struct ethtool_regs

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 15a42de87fec..441bd316b850 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -209,11 +209,25 @@ struct ethtool_value {
 	__u32	data;
 };
 
-/* for passing big chunks of data */
+/**
+ * struct ethtool_regs - hardware register dump
+ * @cmd: Command number = %ETHTOOL_GREGS
+ * @version: Dump format version.  This is driver-specific and may
+ *	distinguish different chips/revisions.  Drivers must use new
+ *	version numbers whenever the dump format changes in an
+ *	incompatible way.
+ * @len: On entry, the real length of @data.  On return, the number of
+ *	bytes used.
+ * @data: Buffer for the register dump
+ *
+ * Users should use %ETHTOOL_GDRVINFO to find the maximum length of
+ * a register dump for the interface.  They must allocate the buffer
+ * immediately following this structure.
+ */
 struct ethtool_regs {
 	__u32	cmd;
-	__u32	version; /* driver-specific, indicates different chips/revs */
-	__u32	len; /* bytes */
+	__u32	version;
+	__u32	len;
 	__u8	data[0];
 };
 
-- 
cgit v1.2.3


From c8364a63f6483db0826ccec6e485c1646522faf1 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 12 Feb 2014 22:14:17 +0000
Subject: ethtool: Expand documentation of struct ethtool_eeprom

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 441bd316b850..d5a0d20a9ff0 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -231,12 +231,29 @@ struct ethtool_regs {
 	__u8	data[0];
 };
 
-/* for passing EEPROM chunks */
+/**
+ * struct ethtool_eeprom - EEPROM dump
+ * @cmd: Command number = %ETHTOOL_GEEPROM, %ETHTOOL_GMODULEEEPROM or
+ *	%ETHTOOL_SEEPROM
+ * @magic: A 'magic cookie' value to guard against accidental changes.
+ *	The value passed in to %ETHTOOL_SEEPROM must match the value
+ *	returned by %ETHTOOL_GEEPROM for the same device.  This is
+ *	unused when @cmd is %ETHTOOL_GMODULEEEPROM.
+ * @offset: Offset within the EEPROM to begin reading/writing, in bytes
+ * @len: On entry, number of bytes to read/write.  On successful
+ *	return, number of bytes actually read/written.  In case of
+ *	error, this may indicate at what point the error occurred.
+ * @data: Buffer to read/write from
+ *
+ * Users may use %ETHTOOL_GDRVINFO or %ETHTOOL_GMODULEINFO to find
+ * the length of an on-board or module EEPROM, respectively.  They
+ * must allocate the buffer immediately following this structure.
+ */
 struct ethtool_eeprom {
 	__u32	cmd;
 	__u32	magic;
-	__u32	offset; /* in bytes */
-	__u32	len; /* in bytes */
+	__u32	offset;
+	__u32	len;
 	__u8	data[0];
 };
 
-- 
cgit v1.2.3


From af440a8aed3d88a39f26b13de26b30c36189d994 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 12 Feb 2014 22:14:23 +0000
Subject: ethtool: Expand documentation of struct ethtool_ringparam

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index d5a0d20a9ff0..e602cd05cef1 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -401,22 +401,37 @@ struct ethtool_coalesce {
 	__u32	rate_sample_interval;
 };
 
-/* for configuring RX/TX ring parameters */
+/**
+ * struct ethtool_ringparam - RX/TX ring parameters
+ * @cmd: Command number = %ETHTOOL_GRINGPARAM or %ETHTOOL_SRINGPARAM
+ * @rx_max_pending: Maximum supported number of pending entries per
+ *	RX ring.  Read-only.
+ * @rx_mini_max_pending: Maximum supported number of pending entries
+ *	per RX mini ring.  Read-only.
+ * @rx_jumbo_max_pending: Maximum supported number of pending entries
+ *	per RX jumbo ring.  Read-only.
+ * @tx_max_pending: Maximum supported number of pending entries per
+ *	TX ring.  Read-only.
+ * @rx_pending: Current maximum number of pending entries per RX ring
+ * @rx_mini_pending: Current maximum number of pending entries per RX
+ *	mini ring
+ * @rx_jumbo_pending: Current maximum number of pending entries per RX
+ *	jumbo ring
+ * @tx_pending: Current maximum supported number of pending entries
+ *	per TX ring
+ *
+ * If the interface does not have separate RX mini and/or jumbo rings,
+ * @rx_mini_max_pending and/or @rx_jumbo_max_pending will be 0.
+ *
+ * There may also be driver-dependent minimum values for the number
+ * of entries per ring.
+ */
 struct ethtool_ringparam {
-	__u32	cmd;	/* ETHTOOL_{G,S}RINGPARAM */
-
-	/* Read only attributes.  These indicate the maximum number
-	 * of pending RX/TX ring entries the driver will allow the
-	 * user to set.
-	 */
+	__u32	cmd;
 	__u32	rx_max_pending;
 	__u32	rx_mini_max_pending;
 	__u32	rx_jumbo_max_pending;
 	__u32	tx_max_pending;
-
-	/* Values changeable by the user.  The valid values are
-	 * in the range 1 to the "*_max_pending" counterpart above.
-	 */
 	__u32	rx_pending;
 	__u32	rx_mini_pending;
 	__u32	rx_jumbo_pending;
-- 
cgit v1.2.3


From 6a7a1081cebacc454588a0ba448eda6bd498e709 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 12 Feb 2014 22:14:28 +0000
Subject: ethtool: Update documentation of struct ethtool_pauseparam

Convert the inline comments to kernel-doc format.

Explicitly specify that non-zero autoneg is an error if link
autonegotiation is disabled.

Specify that pause capabilities should be advertised dependent on link
autonegotiation, not the autoneg flag here.  There is no way to
opt-out of pause frame autonegotiation, and this improves behaviour
when the link partner is configured to follow pause frame
autonegotiation and our interface is not.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index e602cd05cef1..8ca2924a2cbf 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -466,20 +466,30 @@ struct ethtool_channels {
 	__u32	combined_count;
 };
 
-/* for configuring link flow control parameters */
+/**
+ * struct ethtool_pauseparam - Ethernet pause (flow control) parameters
+ * @cmd: Command number = %ETHTOOL_GPAUSEPARAM or %ETHTOOL_SPAUSEPARAM
+ * @autoneg: Flag to enable autonegotiation of pause frame use
+ * @rx_pause: Flag to enable reception of pause frames
+ * @tx_pause: Flag to enable transmission of pause frames
+ *
+ * Drivers should reject a non-zero setting of @autoneg when
+ * autoneogotiation is disabled (or not supported) for the link.
+ *
+ * If the link is autonegotiated, drivers should use
+ * mii_advertise_flowctrl() or similar code to set the advertised
+ * pause frame capabilities based on the @rx_pause and @tx_pause flags,
+ * even if @autoneg is zero.  They should also allow the advertised
+ * pause frame capabilities to be controlled directly through the
+ * advertising field of &struct ethtool_cmd.
+ *
+ * If @autoneg is non-zero, the MAC is configured to send and/or
+ * receive pause frames according to the result of autonegotiation.
+ * Otherwise, it is configured directly based on the @rx_pause and
+ * @tx_pause flags.
+ */
 struct ethtool_pauseparam {
-	__u32	cmd;	/* ETHTOOL_{G,S}PAUSEPARAM */
-
-	/* If the link is being auto-negotiated (via ethtool_cmd.autoneg
-	 * being true) the user may set 'autoneg' here non-zero to have the
-	 * pause parameters be auto-negotiated too.  In such a case, the
-	 * {rx,tx}_pause values below determine what capabilities are
-	 * advertised.
-	 *
-	 * If 'autoneg' is zero or the link is not being auto-negotiated,
-	 * then {rx,tx}_pause force the driver to use/not-use pause
-	 * flow control.
-	 */
+	__u32	cmd;
 	__u32	autoneg;
 	__u32	rx_pause;
 	__u32	tx_pause;
-- 
cgit v1.2.3


From fe5df1b91ec35fb80159874670e6bc81c8851c04 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 12 Feb 2014 22:14:33 +0000
Subject: ethtool: Expand documentation of string set types

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 59 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 47 insertions(+), 12 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 8ca2924a2cbf..e726340d1f8e 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -496,31 +496,66 @@ struct ethtool_pauseparam {
 };
 
 #define ETH_GSTRING_LEN		32
+
+/**
+ * enum ethtool_stringset - string set ID
+ * @ETH_SS_TEST: Self-test result names, for use with %ETHTOOL_TEST
+ * @ETH_SS_STATS: Statistic names, for use with %ETHTOOL_GSTATS
+ * @ETH_SS_PRIV_FLAGS: Driver private flag names, for use with
+ *	%ETHTOOL_GPFLAGS and %ETHTOOL_SPFLAGS
+ * @ETH_SS_NTUPLE_FILTERS: Previously used with %ETHTOOL_GRXNTUPLE;
+ *	now deprecated
+ * @ETH_SS_FEATURES: Device feature names
+ */
 enum ethtool_stringset {
 	ETH_SS_TEST		= 0,
 	ETH_SS_STATS,
 	ETH_SS_PRIV_FLAGS,
-	ETH_SS_NTUPLE_FILTERS,	/* Do not use, GRXNTUPLE is now deprecated */
+	ETH_SS_NTUPLE_FILTERS,
 	ETH_SS_FEATURES,
 };
 
-/* for passing string sets for data tagging */
+/**
+ * struct ethtool_gstrings - string set for data tagging
+ * @cmd: Command number = %ETHTOOL_GSTRINGS
+ * @string_set: String set ID; one of &enum ethtool_stringset
+ * @len: On return, the number of strings in the string set
+ * @data: Buffer for strings.  Each string is null-padded to a size of
+ *	%ETH_GSTRING_LEN.
+ *
+ * Users must use %ETHTOOL_GSSET_INFO to find the number of strings in
+ * the string set.  They must allocate a buffer of the appropriate
+ * size immediately following this structure.
+ */
 struct ethtool_gstrings {
-	__u32	cmd;		/* ETHTOOL_GSTRINGS */
-	__u32	string_set;	/* string set id e.c. ETH_SS_TEST, etc*/
-	__u32	len;		/* number of strings in the string set */
+	__u32	cmd;
+	__u32	string_set;
+	__u32	len;
 	__u8	data[0];
 };
 
+/**
+ * struct ethtool_sset_info - string set information
+ * @cmd: Command number = %ETHTOOL_GSSET_INFO
+ * @sset_mask: On entry, a bitmask of string sets to query, with bits
+ *	numbered according to &enum ethtool_stringset.  On return, a
+ *	bitmask of those string sets queried that are supported.
+ * @data: Buffer for string set sizes.  On return, this contains the
+ *	size of each string set that was queried and supported, in
+ *	order of ID.
+ *
+ * Example: The user passes in @sset_mask = 0x7 (sets 0, 1, 2) and on
+ * return @sset_mask == 0x6 (sets 1, 2).  Then @data[0] contains the
+ * size of set 1 and @data[1] contains the size of set 2.
+ *
+ * Users must allocate a buffer of the appropriate size (4 * number of
+ * sets queried) immediately following this structure.
+ */
 struct ethtool_sset_info {
-	__u32	cmd;		/* ETHTOOL_GSSET_INFO */
+	__u32	cmd;
 	__u32	reserved;
-	__u64	sset_mask;	/* input: each bit selects an sset to query */
-				/* output: each bit a returned sset */
-	__u32	data[0];	/* ETH_SS_xxx count, in order, based on bits
-				   in sset_mask.  One bit implies one
-				   __u32, two bits implies two
-				   __u32's, etc. */
+	__u64	sset_mask;
+	__u32	data[0];
 };
 
 /**
-- 
cgit v1.2.3


From 4e5a62db2bed17b2b6241850f330c820f75bc3cd Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 12 Feb 2014 22:14:38 +0000
Subject: ethtool: Expand documentation of struct ethtool_test

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index e726340d1f8e..f246f3703ed8 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -575,12 +575,25 @@ enum ethtool_test_flags {
 	ETH_TEST_FL_EXTERNAL_LB_DONE	= (1 << 3),
 };
 
-/* for requesting NIC test and getting results*/
+/**
+ * struct ethtool_test - device self-test invocation
+ * @cmd: Command number = %ETHTOOL_TEST
+ * @flags: A bitmask of flags from &enum ethtool_test_flags.  Some
+ *	flags may be set by the user on entry; others may be set by
+ *	the driver on return.
+ * @len: On return, the number of test results
+ * @data: Array of test results
+ *
+ * Users must use %ETHTOOL_GSSET_INFO or %ETHTOOL_GDRVINFO to find the
+ * number of test results that will be returned.  They must allocate a
+ * buffer of the appropriate size (8 * number of results) immediately
+ * following this structure.
+ */
 struct ethtool_test {
-	__u32	cmd;		/* ETHTOOL_TEST */
-	__u32	flags;		/* ETH_TEST_FL_xxx */
+	__u32	cmd;
+	__u32	flags;
 	__u32	reserved;
-	__u32	len;		/* result length, in number of u64 elements */
+	__u32	len;
 	__u64	data[0];
 };
 
-- 
cgit v1.2.3


From 590912298c2d7d9d69c87d93af1f2f8b365f1e04 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 12 Feb 2014 22:14:43 +0000
Subject: ethtool: Expand documentation of struct ethtool_stats

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index f246f3703ed8..e96aec0bf7c1 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -597,10 +597,20 @@ struct ethtool_test {
 	__u64	data[0];
 };
 
-/* for dumping NIC-specific statistics */
+/**
+ * struct ethtool_stats - device-specific statistics
+ * @cmd: Command number = %ETHTOOL_GSTATS
+ * @n_stats: On return, the number of statistics
+ * @data: Array of statistics
+ *
+ * Users must use %ETHTOOL_GSSET_INFO or %ETHTOOL_GDRVINFO to find the
+ * number of statistics that will be returned.  They must allocate a
+ * buffer of the appropriate size (8 * number of statistics)
+ * immediately following this structure.
+ */
 struct ethtool_stats {
-	__u32	cmd;		/* ETHTOOL_GSTATS */
-	__u32	n_stats;	/* number of u64's being returned */
+	__u32	cmd;
+	__u32	n_stats;
 	__u64	data[0];
 };
 
-- 
cgit v1.2.3


From f432c095f78cd77df049bd0a8322e91ed6d3c59a Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 12 Feb 2014 22:14:48 +0000
Subject: ethtool: Expand documentation of struct ethtool_perm_addr

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index e96aec0bf7c1..79be416f03e6 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -614,8 +614,19 @@ struct ethtool_stats {
 	__u64	data[0];
 };
 
+/**
+ * struct ethtool_perm_addr - permanent hardware address
+ * @cmd: Command number = %ETHTOOL_GPERMADDR
+ * @size: On entry, the size of the buffer.  On return, the size of the
+ *	address.  The command fails if the buffer is too small.
+ * @data: Buffer for the address
+ *
+ * Users must allocate the buffer immediately following this structure.
+ * A buffer size of %MAX_ADDR_LEN should be sufficient for any address
+ * type.
+ */
 struct ethtool_perm_addr {
-	__u32	cmd;		/* ETHTOOL_GPERMADDR */
+	__u32	cmd;
 	__u32	size;
 	__u8	data[0];
 };
-- 
cgit v1.2.3


From ba569dc3e8b9964a91da0cb802d3e46796e1168e Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 12 Feb 2014 22:14:59 +0000
Subject: ethtool: Move kernel-doc comment next to struct ethtool_dump
 definition

The kernel-doc script does not tolerate the macro definition in between.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 79be416f03e6..24418ac78b77 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -920,9 +920,6 @@ struct ethtool_flash {
  * 	 for %ETHTOOL_GET_DUMP_FLAG command
  * @data: data collected for get dump data operation
  */
-
-#define ETH_FW_DUMP_DISABLE 0
-
 struct ethtool_dump {
 	__u32	cmd;
 	__u32	version;
@@ -931,6 +928,8 @@ struct ethtool_dump {
 	__u8	data[0];
 };
 
+#define ETH_FW_DUMP_DISABLE 0
+
 /* for returning and changing feature sets */
 
 /**
-- 
cgit v1.2.3


From 073e3cf21916afa25c1f3e8807a8ebf4faf6f4d5 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 12 Feb 2014 22:15:09 +0000
Subject: ethtool: Fix unwanted section breaks in kernel-doc

A colon almost unavoidably starts a new section.  The script should be
changed to provide a way to avoid this, but for now reword the
comments to avoid using colons.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 24418ac78b77..fd161e91b6d7 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -351,17 +351,18 @@ struct ethtool_modinfo {
  * @rate_sample_interval: How often to do adaptive coalescing packet rate
  *	sampling, measured in seconds.  Must not be zero.
  *
- * Each pair of (usecs, max_frames) fields specifies this exit
- * condition for interrupt coalescing:
+ * Each pair of (usecs, max_frames) fields specifies that interrupts
+ * should be coalesced until
  *	(usecs > 0 && time_since_first_completion >= usecs) ||
  *	(max_frames > 0 && completed_frames >= max_frames)
+ *
  * It is illegal to set both usecs and max_frames to zero as this
  * would cause interrupts to never be generated.  To disable
  * coalescing, set usecs = 0 and max_frames = 1.
  *
  * Some implementations ignore the value of max_frames and use the
- * condition:
- *	time_since_first_completion >= usecs
+ * condition time_since_first_completion >= usecs
+ *
  * This is deprecated.  Drivers for hardware that does not support
  * counting completions should validate that max_frames == !rx_usecs.
  *
@@ -809,7 +810,7 @@ struct ethtool_rx_flow_spec {
  * %ETHTOOL_SRXCLSRLINS may add the rule at any suitable unused
  * location, and may remove a rule at a later location (lower
  * priority) that matches exactly the same set of flows.  The special
- * values are: %RX_CLS_LOC_ANY, selecting any location;
+ * values are %RX_CLS_LOC_ANY, selecting any location;
  * %RX_CLS_LOC_FIRST, selecting the first suitable location (maximum
  * priority); and %RX_CLS_LOC_LAST, selecting the last suitable
  * location (minimum priority).  Additional special values may be
@@ -949,8 +950,9 @@ struct ethtool_get_features_block {
 /**
  * struct ethtool_gfeatures - command to get state of device's features
  * @cmd: command number = %ETHTOOL_GFEATURES
- * @size: in: number of elements in the features[] array;
- *       out: number of elements in features[] needed to hold all features
+ * @size: On entry, the number of elements in the features[] array;
+ *	on return, the number of elements in features[] needed to hold
+ *	all features
  * @features: state of features
  */
 struct ethtool_gfeatures {
-- 
cgit v1.2.3


From e3480a61fca72d40d6dc4baaf37e94fcbfa95e19 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Thu, 30 Jan 2014 17:20:24 -0800
Subject: Input: uinput - add UI_GET_SYSNAME ioctl to retrieve the sysfs path

uinput is used in the xorg-integration-tests suite and in the wayland
test suite. These automated tests suites create many virtual input
devices and then hook something to read these newly created devices.

Currently, uinput does not provide the created input device, which means
that we rely on an heuristic to guess which input node was created.
The problem is that is heuristic is subjected to races between different
uinput devices or even with physical devices. Having a way to retrieve
the sysfs path allows us to find without any doubts the event node.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/uinput.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/uinput.h      |  2 ++
 include/uapi/linux/uinput.h | 13 ++++++++++++-
 3 files changed, 57 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index d8ae08d12abf..856936247500 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -20,6 +20,8 @@
  * Author: Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org>
  *
  * Changes/Revisions:
+ *	0.4	01/09/2014 (Benjamin Tissoires <benjamin.tissoires@redhat.com>)
+ *		- add UI_GET_SYSNAME ioctl
  *	0.3	09/04/2006 (Anssi Hannula <anssi.hannula@gmail.com>)
  *		- updated ff support for the changes in kernel interface
  *		- added MODULE_VERSION
@@ -670,6 +672,31 @@ static int uinput_ff_upload_from_user(const char __user *buffer,
 	__ret;						\
 })
 
+static int uinput_str_to_user(void __user *dest, const char *str,
+			      unsigned int maxlen)
+{
+	char __user *p = dest;
+	int len, ret;
+
+	if (!str)
+		return -ENOENT;
+
+	if (maxlen == 0)
+		return -EINVAL;
+
+	len = strlen(str) + 1;
+	if (len > maxlen)
+		len = maxlen;
+
+	ret = copy_to_user(p, str, len);
+	if (ret)
+		return -EFAULT;
+
+	/* force terminating '\0' */
+	ret = put_user(0, p + len - 1);
+	return ret ? -EFAULT : len;
+}
+
 static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
 				 unsigned long arg, void __user *p)
 {
@@ -679,6 +706,8 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
 	struct uinput_ff_erase  ff_erase;
 	struct uinput_request   *req;
 	char			*phys;
+	const char		*name;
+	unsigned int		size;
 
 	retval = mutex_lock_interruptible(&udev->mutex);
 	if (retval)
@@ -831,6 +860,20 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
 			goto out;
 	}
 
+	size = _IOC_SIZE(cmd);
+
+	/* Now check variable-length commands */
+	switch (cmd & ~IOCSIZE_MASK) {
+	case UI_GET_SYSNAME(0):
+		if (udev->state != UIST_CREATED) {
+			retval = -ENOENT;
+			goto out;
+		}
+		name = dev_name(&udev->dev->dev);
+		retval = uinput_str_to_user(p, name, size);
+		goto out;
+	}
+
 	retval = -EINVAL;
  out:
 	mutex_unlock(&udev->mutex);
diff --git a/include/linux/uinput.h b/include/linux/uinput.h
index 0a4487d3fb1e..0994c0d01a09 100644
--- a/include/linux/uinput.h
+++ b/include/linux/uinput.h
@@ -20,6 +20,8 @@
  * Author: Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org>
  *
  * Changes/Revisions:
+ *	0.4	01/09/2014 (Benjamin Tissoires <benjamin.tissoires@redhat.com>)
+ *		- add UI_GET_SYSNAME ioctl
  *	0.3	24/05/2006 (Anssi Hannula <anssi.hannulagmail.com>)
  *		- update ff support for the changes in kernel interface
  *		- add UINPUT_VERSION
diff --git a/include/uapi/linux/uinput.h b/include/uapi/linux/uinput.h
index fe46431593f9..0389b489bbba 100644
--- a/include/uapi/linux/uinput.h
+++ b/include/uapi/linux/uinput.h
@@ -20,6 +20,8 @@
  * Author: Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org>
  *
  * Changes/Revisions:
+ *	0.4	01/09/2014 (Benjamin Tissoires <benjamin.tissoires@redhat.com>)
+ *		- add UI_GET_SYSNAME ioctl
  *	0.3	24/05/2006 (Anssi Hannula <anssi.hannulagmail.com>)
  *		- update ff support for the changes in kernel interface
  *		- add UINPUT_VERSION
@@ -35,7 +37,7 @@
 #include <linux/types.h>
 #include <linux/input.h>
 
-#define UINPUT_VERSION		3
+#define UINPUT_VERSION		4
 
 
 struct uinput_ff_upload {
@@ -73,6 +75,15 @@ struct uinput_ff_erase {
 #define UI_BEGIN_FF_ERASE	_IOWR(UINPUT_IOCTL_BASE, 202, struct uinput_ff_erase)
 #define UI_END_FF_ERASE		_IOW(UINPUT_IOCTL_BASE, 203, struct uinput_ff_erase)
 
+/**
+ * UI_GET_SYSNAME - get the sysfs name of the created uinput device
+ *
+ * @return the sysfs name of the created virtual input device.
+ * The complete sysfs path is then /sys/devices/virtual/input/--NAME--
+ * Usually, it is in the form "inputN"
+ */
+#define UI_GET_SYSNAME(len)	_IOC(_IOC_READ, UINPUT_IOCTL_BASE, 300, len)
+
 /*
  * To write a force-feedback-capable driver, the upload_effect
  * and erase_effect callbacks in input_dev must be implemented.
-- 
cgit v1.2.3


From 977cb0ecf82eb6d15562573c31edebf90db35163 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 13 Feb 2014 14:27:40 -0800
Subject: tcp: add pacing_rate information into tcp_info

Add two new fields to struct tcp_info, to report sk_pacing_rate
and sk_max_pacing_rate to monitoring applications, as ss from iproute2.

User exported fields are 64bit, even if kernel is currently using 32bit
fields.

lpaa5:~# ss -i
..
	 skmem:(r0,rb357120,t0,tb2097152,f1584,w1980880,o0,bl0) ts sack cubic
wscale:6,6 rto:400 rtt:0.875/0.75 mss:1448 cwnd:1 ssthresh:12 send
13.2Mbps pacing_rate 3336.2Mbps unacked:15 retrans:1/5448 lost:15
rcv_space:29200

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tcp.h | 3 +++
 net/ipv4/tcp.c           | 5 +++++
 2 files changed, 8 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 377f1e59411d..3b9718328d8b 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -186,6 +186,9 @@ struct tcp_info {
 	__u32	tcpi_rcv_space;
 
 	__u32	tcpi_total_retrans;
+
+	__u64	tcpi_pacing_rate;
+	__u64	tcpi_max_pacing_rate;
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9f3a2db9109e..bed379c7abcd 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2794,6 +2794,11 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
 	info->tcpi_rcv_space = tp->rcvq_space.space;
 
 	info->tcpi_total_retrans = tp->total_retrans;
+
+	info->tcpi_pacing_rate = sk->sk_pacing_rate != ~0U ?
+					sk->sk_pacing_rate : ~0ULL;
+	info->tcpi_max_pacing_rate = sk->sk_max_pacing_rate != ~0U ?
+					sk->sk_max_pacing_rate : ~0ULL;
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
 
-- 
cgit v1.2.3


From e487e4dc2eb227c52fc71eae683181fa917163b8 Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@animalcreek.com>
Date: Tue, 14 Jan 2014 17:52:09 -0700
Subject: NFC: Add ISO/IEC 15693 header definitions

Add the header definitions required by upcoming
patches that add support for ISO/IEC 15693.

Signed-off-by: Mark A. Greer <mgreer@animalcreek.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/digital.h | 4 ++++
 include/net/nfc/nfc.h     | 3 +++
 include/uapi/linux/nfc.h  | 9 ++++++++-
 3 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/nfc/digital.h b/include/net/nfc/digital.h
index 81af21e9bcd4..1f0528d33500 100644
--- a/include/net/nfc/digital.h
+++ b/include/net/nfc/digital.h
@@ -35,6 +35,7 @@ enum {
 	NFC_DIGITAL_RF_TECH_106A = 0,
 	NFC_DIGITAL_RF_TECH_212F,
 	NFC_DIGITAL_RF_TECH_424F,
+	NFC_DIGITAL_RF_TECH_ISO15693,
 
 	NFC_DIGITAL_RF_TECH_LAST,
 };
@@ -57,6 +58,9 @@ enum {
 	NFC_DIGITAL_FRAMING_NFCF_NFC_DEP,
 	NFC_DIGITAL_FRAMING_NFC_DEP_ACTIVATED,
 
+	NFC_DIGITAL_FRAMING_ISO15693_INVENTORY,
+	NFC_DIGITAL_FRAMING_ISO15693_TVT,	/* Type V Tag (ISO/IEC 15693) */
+
 	NFC_DIGITAL_FRAMING_LAST,
 };
 
diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index e80894bca1d0..2e8b40c16274 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -111,6 +111,9 @@ struct nfc_target {
 	u8 sensf_res[NFC_SENSF_RES_MAXSIZE];
 	u8 hci_reader_gate;
 	u8 logical_idx;
+	u8 is_iso15693;
+	u8 iso15693_dsfid;
+	u8 iso15693_uid[NFC_ISO15693_UID_MAXSIZE];
 };
 
 /**
diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
index 6ad6cc03ccd3..9789dc95b6a8 100644
--- a/include/uapi/linux/nfc.h
+++ b/include/uapi/linux/nfc.h
@@ -150,6 +150,8 @@ enum nfc_commands {
  * @NFC_ATTR_SE_TYPE: Secure element type (UICC or EMBEDDED)
  * @NFC_ATTR_FIRMWARE_DOWNLOAD_STATUS: Firmware download operation status
  * @NFC_ATTR_APDU: Secure element APDU
+ * @NFC_ATTR_TARGET_ISO15693_DSFID: ISO 15693 Data Storage Format Identifier
+ * @NFC_ATTR_TARGET_ISO15693_UID: ISO 15693 Unique Identifier
  */
 enum nfc_attrs {
 	NFC_ATTR_UNSPEC,
@@ -178,6 +180,8 @@ enum nfc_attrs {
 	NFC_ATTR_SE_AID,
 	NFC_ATTR_FIRMWARE_DOWNLOAD_STATUS,
 	NFC_ATTR_SE_APDU,
+	NFC_ATTR_TARGET_ISO15693_DSFID,
+	NFC_ATTR_TARGET_ISO15693_UID,
 /* private: internal use only */
 	__NFC_ATTR_AFTER_LAST
 };
@@ -200,6 +204,7 @@ enum nfc_sdp_attr {
 #define NFC_SENSF_RES_MAXSIZE 18
 #define NFC_GB_MAXSIZE        48
 #define NFC_FIRMWARE_NAME_MAXSIZE 32
+#define NFC_ISO15693_UID_MAXSIZE 8
 
 /* NFC protocols */
 #define NFC_PROTO_JEWEL		1
@@ -208,8 +213,9 @@ enum nfc_sdp_attr {
 #define NFC_PROTO_ISO14443	4
 #define NFC_PROTO_NFC_DEP	5
 #define NFC_PROTO_ISO14443_B	6
+#define NFC_PROTO_ISO15693	7
 
-#define NFC_PROTO_MAX		7
+#define NFC_PROTO_MAX		8
 
 /* NFC communication modes */
 #define NFC_COMM_ACTIVE  0
@@ -227,6 +233,7 @@ enum nfc_sdp_attr {
 #define NFC_PROTO_ISO14443_MASK	  (1 << NFC_PROTO_ISO14443)
 #define NFC_PROTO_NFC_DEP_MASK	  (1 << NFC_PROTO_NFC_DEP)
 #define NFC_PROTO_ISO14443_B_MASK (1 << NFC_PROTO_ISO14443_B)
+#define NFC_PROTO_ISO15693_MASK	  (1 << NFC_PROTO_ISO15693)
 
 /* NFC Secure Elements */
 #define NFC_SE_UICC     0x1
-- 
cgit v1.2.3


From d3623099d3509fa68fa28235366049dd3156c63a Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Fri, 14 Feb 2014 15:30:36 +0100
Subject: ipsec: add support of limited SA dump

The goal of this patch is to allow userland to dump only a part of SA by
specifying a filter during the dump.
The kernel is in charge to filter SA, this avoids to generate useless netlink
traffic (it save also some cpu cycles). This is particularly useful when there
is a big number of SA set on the system.

Note that I removed the union in struct xfrm_state_walk to fix a problem on arm.
struct netlink_callback->args is defined as a array of 6 long and the first long
is used in xfrm code to flag the cb as initialized. Hence, we must have:
sizeof(struct xfrm_state_walk) <= sizeof(long) * 5.
With the union, it was false on arm (sizeof(struct xfrm_state_walk) was
sizeof(long) * 7), due to the padding.
In fact, whatever the arch is, this union seems useless, there will be always
padding after it. Removing it will not increase the size of this struct (and
reduce it on arm).

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h           | 10 +++++-----
 include/uapi/linux/pfkeyv2.h | 15 ++++++++++++++-
 include/uapi/linux/xfrm.h    | 10 ++++++++++
 net/key/af_key.c             | 19 ++++++++++++++++++-
 net/xfrm/xfrm_state.c        | 25 ++++++++++++++++++++++++-
 net/xfrm/xfrm_user.c         | 28 +++++++++++++++++++++++++++-
 6 files changed, 98 insertions(+), 9 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 5313ccfdeedf..45332acac022 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -118,11 +118,10 @@
 struct xfrm_state_walk {
 	struct list_head	all;
 	u8			state;
-	union {
-		u8		dying;
-		u8		proto;
-	};
+	u8			dying;
+	u8			proto;
 	u32			seq;
+	struct xfrm_filter	*filter;
 };
 
 /* Full description of state of transformer. */
@@ -1406,7 +1405,8 @@ static inline void xfrm_sysctl_fini(struct net *net)
 }
 #endif
 
-void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto);
+void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
+			  struct xfrm_filter *filter);
 int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
 		    int (*func)(struct xfrm_state *, int, void*), void *);
 void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net);
diff --git a/include/uapi/linux/pfkeyv2.h b/include/uapi/linux/pfkeyv2.h
index 0b80c806631f..ada7f0171ccc 100644
--- a/include/uapi/linux/pfkeyv2.h
+++ b/include/uapi/linux/pfkeyv2.h
@@ -235,6 +235,18 @@ struct sadb_x_kmaddress {
 } __attribute__((packed));
 /* sizeof(struct sadb_x_kmaddress) == 8 */
 
+/* To specify the SA dump filter */
+struct sadb_x_filter {
+	__u16	sadb_x_filter_len;
+	__u16	sadb_x_filter_exttype;
+	__u32	sadb_x_filter_saddr[4];
+	__u32	sadb_x_filter_daddr[4];
+	__u16	sadb_x_filter_family;
+	__u8	sadb_x_filter_splen;
+	__u8	sadb_x_filter_dplen;
+} __attribute__((packed));
+/* sizeof(struct sadb_x_filter) == 40 */
+
 /* Message types */
 #define SADB_RESERVED		0
 #define SADB_GETSPI		1
@@ -358,7 +370,8 @@ struct sadb_x_kmaddress {
 #define SADB_X_EXT_SEC_CTX		24
 /* Used with MIGRATE to pass @ to IKE for negotiation */
 #define SADB_X_EXT_KMADDRESS		25
-#define SADB_EXT_MAX			25
+#define SADB_X_EXT_FILTER		26
+#define SADB_EXT_MAX			26
 
 /* Identity Extension values */
 #define SADB_IDENTTYPE_RESERVED	0
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index a8cd6a4a2970..6550c679584f 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -298,6 +298,8 @@ enum xfrm_attr_type_t {
 	XFRMA_TFCPAD,		/* __u32 */
 	XFRMA_REPLAY_ESN_VAL,	/* struct xfrm_replay_esn */
 	XFRMA_SA_EXTRA_FLAGS,	/* __u32 */
+	XFRMA_PROTO,		/* __u8 */
+	XFRMA_FILTER,		/* struct xfrm_filter */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
@@ -474,6 +476,14 @@ struct xfrm_user_mapping {
 	__be16				new_sport;
 };
 
+struct xfrm_filter {
+	xfrm_address_t			saddr;
+	xfrm_address_t			daddr;
+	__u16				family;
+	__u8				splen;
+	__u8				dplen;
+};
+
 #ifndef __KERNEL__
 /* backwards compatibility for userspace */
 #define XFRMGRP_ACQUIRE		1
diff --git a/net/key/af_key.c b/net/key/af_key.c
index e1c69d024197..f0879c19f452 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1798,6 +1798,7 @@ static void pfkey_dump_sa_done(struct pfkey_sock *pfk)
 static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
 	u8 proto;
+	struct xfrm_filter *filter = NULL;
 	struct pfkey_sock *pfk = pfkey_sk(sk);
 
 	if (pfk->dump.dump != NULL)
@@ -1807,11 +1808,27 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
 	if (proto == 0)
 		return -EINVAL;
 
+	if (ext_hdrs[SADB_X_EXT_FILTER - 1]) {
+		struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1];
+
+		filter = kmalloc(sizeof(*filter), GFP_KERNEL);
+		if (filter == NULL)
+			return -ENOMEM;
+
+		memcpy(&filter->saddr, &xfilter->sadb_x_filter_saddr,
+		       sizeof(xfrm_address_t));
+		memcpy(&filter->daddr, &xfilter->sadb_x_filter_daddr,
+		       sizeof(xfrm_address_t));
+		filter->family = xfilter->sadb_x_filter_family;
+		filter->splen = xfilter->sadb_x_filter_splen;
+		filter->dplen = xfilter->sadb_x_filter_dplen;
+	}
+
 	pfk->dump.msg_version = hdr->sadb_msg_version;
 	pfk->dump.msg_portid = hdr->sadb_msg_pid;
 	pfk->dump.dump = pfkey_dump_sa;
 	pfk->dump.done = pfkey_dump_sa_done;
-	xfrm_state_walk_init(&pfk->dump.u.state, proto);
+	xfrm_state_walk_init(&pfk->dump.u.state, proto, filter);
 
 	return pfkey_do_dump(pfk);
 }
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 0bf12f665b9b..a750901ac3db 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1603,6 +1603,23 @@ unlock:
 }
 EXPORT_SYMBOL(xfrm_alloc_spi);
 
+static bool __xfrm_state_filter_match(struct xfrm_state *x,
+				      struct xfrm_filter *filter)
+{
+	if (filter) {
+		if ((filter->family == AF_INET ||
+		     filter->family == AF_INET6) &&
+		    x->props.family != filter->family)
+			return false;
+
+		return addr_match(&x->props.saddr, &filter->saddr,
+				  filter->splen) &&
+		       addr_match(&x->id.daddr, &filter->daddr,
+				  filter->dplen);
+	}
+	return true;
+}
+
 int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
 		    int (*func)(struct xfrm_state *, int, void*),
 		    void *data)
@@ -1625,6 +1642,8 @@ int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
 		state = container_of(x, struct xfrm_state, km);
 		if (!xfrm_id_proto_match(state->id.proto, walk->proto))
 			continue;
+		if (!__xfrm_state_filter_match(state, walk->filter))
+			continue;
 		err = func(state, walk->seq, data);
 		if (err) {
 			list_move_tail(&walk->all, &x->all);
@@ -1643,17 +1662,21 @@ out:
 }
 EXPORT_SYMBOL(xfrm_state_walk);
 
-void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto)
+void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
+			  struct xfrm_filter *filter)
 {
 	INIT_LIST_HEAD(&walk->all);
 	walk->proto = proto;
 	walk->state = XFRM_STATE_DEAD;
 	walk->seq = 0;
+	walk->filter = filter;
 }
 EXPORT_SYMBOL(xfrm_state_walk_init);
 
 void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net)
 {
+	kfree(walk->filter);
+
 	if (list_empty(&walk->all))
 		return;
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d7694f258294..023e5e7ea4c6 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -887,6 +887,7 @@ static int xfrm_dump_sa_done(struct netlink_callback *cb)
 	return 0;
 }
 
+static const struct nla_policy xfrma_policy[XFRMA_MAX+1];
 static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
@@ -902,8 +903,31 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
 	info.nlmsg_flags = NLM_F_MULTI;
 
 	if (!cb->args[0]) {
+		struct nlattr *attrs[XFRMA_MAX+1];
+		struct xfrm_filter *filter = NULL;
+		u8 proto = 0;
+		int err;
+
 		cb->args[0] = 1;
-		xfrm_state_walk_init(walk, 0);
+
+		err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX,
+				  xfrma_policy);
+		if (err < 0)
+			return err;
+
+		if (attrs[XFRMA_FILTER]) {
+			filter = kmalloc(sizeof(*filter), GFP_KERNEL);
+			if (filter == NULL)
+				return -ENOMEM;
+
+			memcpy(filter, nla_data(attrs[XFRMA_FILTER]),
+			       sizeof(*filter));
+		}
+
+		if (attrs[XFRMA_PROTO])
+			proto = nla_get_u8(attrs[XFRMA_PROTO]);
+
+		xfrm_state_walk_init(walk, proto, filter);
 	}
 
 	(void) xfrm_state_walk(net, walk, dump_one_state, &info);
@@ -2309,6 +2333,8 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_TFCPAD]		= { .type = NLA_U32 },
 	[XFRMA_REPLAY_ESN_VAL]	= { .len = sizeof(struct xfrm_replay_state_esn) },
 	[XFRMA_SA_EXTRA_FLAGS]	= { .type = NLA_U32 },
+	[XFRMA_PROTO]		= { .type = NLA_U8 },
+	[XFRMA_FILTER]		= { .len = sizeof(struct xfrm_filter) },
 };
 
 static const struct xfrm_link {
-- 
cgit v1.2.3


From 01325476d6e46185031be4a9bc6443832dbc807c Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sun, 16 Feb 2014 11:34:30 -0800
Subject: Drivers: hv: Implement the file copy service

Implement the file copy service for Linux guests on Hyper-V. This permits the
host to copy a file (over VMBUS) into the guest. This facility is part of
"guest integration services" supported on the Windows platform.
Here is a link that provides additional details on this functionality:

http://technet.microsoft.com/en-us/library/dn464282.aspx

In V1 version of the patch I have addressed comments from
Olaf Hering <olaf@aepfle.de> and Dan Carpenter <dan.carpenter@oracle.com>

In V2 version of this patch I did some minor cleanup (making some globals
static). In V4 version of the patch I have addressed all of Olaf's
most recent set of comments/concerns.

In V5 version of the patch I had addressed Greg's most recent comments.
I would like to thank Greg for suggesting that I use misc device; it has
significantly simplified the code.

In V6 version of the patch I have cleaned up error message based on Olaf's
comments. I have also rebased the patch based on the current tip.

In this version of the patch, I have addressed the latest comments from Greg.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/Makefile         |   2 +-
 drivers/hv/hv_fcopy.c       | 414 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/hv/hv_util.c        |  11 ++
 drivers/hv/hyperv_vmbus.h   |   4 +
 include/linux/hyperv.h      |  12 +-
 include/uapi/linux/hyperv.h |  46 +++++
 tools/hv/hv_fcopy_daemon.c  | 195 +++++++++++++++++++++
 7 files changed, 682 insertions(+), 2 deletions(-)
 create mode 100644 drivers/hv/hv_fcopy.c
 create mode 100644 tools/hv/hv_fcopy_daemon.c

(limited to 'include/uapi/linux')

diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
index 0a74b5661186..5e4dfa4cfe22 100644
--- a/drivers/hv/Makefile
+++ b/drivers/hv/Makefile
@@ -5,4 +5,4 @@ obj-$(CONFIG_HYPERV_BALLOON)	+= hv_balloon.o
 hv_vmbus-y := vmbus_drv.o \
 		 hv.o connection.o channel.o \
 		 channel_mgmt.o ring_buffer.o
-hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o
+hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_fcopy.o
diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
new file mode 100644
index 000000000000..285b5e210a09
--- /dev/null
+++ b/drivers/hv/hv_fcopy.c
@@ -0,0 +1,414 @@
+/*
+ * An implementation of file copy service.
+ *
+ * Copyright (C) 2014, Microsoft, Inc.
+ *
+ * Author : K. Y. Srinivasan <ksrinivasan@novell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/semaphore.h>
+#include <linux/fs.h>
+#include <linux/nls.h>
+#include <linux/workqueue.h>
+#include <linux/cdev.h>
+#include <linux/hyperv.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <linux/miscdevice.h>
+
+#include "hyperv_vmbus.h"
+
+#define WIN8_SRV_MAJOR		1
+#define WIN8_SRV_MINOR		1
+#define WIN8_SRV_VERSION	(WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR)
+
+/*
+ * Global state maintained for transaction that is being processed.
+ * For a class of integration services, including the "file copy service",
+ * the specified protocol is a "request/response" protocol which means that
+ * there can only be single outstanding transaction from the host at any
+ * given point in time. We use this to simplify memory management in this
+ * driver - we cache and process only one message at a time.
+ *
+ * While the request/response protocol is guaranteed by the host, we further
+ * ensure this by serializing packet processing in this driver - we do not
+ * read additional packets from the VMBUs until the current packet is fully
+ * handled.
+ *
+ * The transaction "active" state is set when we receive a request from the
+ * host and we cleanup this state when the transaction is completed - when we
+ * respond to the host with our response. When the transaction active state is
+ * set, we defer handling incoming packets.
+ */
+
+static struct {
+	bool active; /* transaction status - active or not */
+	int recv_len; /* number of bytes received. */
+	struct hv_fcopy_hdr  *fcopy_msg; /* current message */
+	struct hv_start_fcopy  message; /*  sent to daemon */
+	struct vmbus_channel *recv_channel; /* chn we got the request */
+	u64 recv_req_id; /* request ID. */
+	void *fcopy_context; /* for the channel callback */
+	struct semaphore read_sema;
+} fcopy_transaction;
+
+static bool opened; /* currently device opened */
+
+/*
+ * Before we can accept copy messages from the host, we need
+ * to handshake with the user level daemon. This state tracks
+ * if we are in the handshake phase.
+ */
+static bool in_hand_shake = true;
+static void fcopy_send_data(void);
+static void fcopy_respond_to_host(int error);
+static void fcopy_work_func(struct work_struct *dummy);
+static DECLARE_DELAYED_WORK(fcopy_work, fcopy_work_func);
+static u8 *recv_buffer;
+
+static void fcopy_work_func(struct work_struct *dummy)
+{
+	/*
+	 * If the timer fires, the user-mode component has not responded;
+	 * process the pending transaction.
+	 */
+	fcopy_respond_to_host(HV_E_FAIL);
+}
+
+static int fcopy_handle_handshake(u32 version)
+{
+	switch (version) {
+	case FCOPY_CURRENT_VERSION:
+		break;
+	default:
+		/*
+		 * For now we will fail the registration.
+		 * If and when we have multiple versions to
+		 * deal with, we will be backward compatible.
+		 * We will add this code when needed.
+		 */
+		return -EINVAL;
+	}
+	pr_info("FCP: user-mode registering done. Daemon version: %d\n",
+		version);
+	fcopy_transaction.active = false;
+	if (fcopy_transaction.fcopy_context)
+		hv_fcopy_onchannelcallback(fcopy_transaction.fcopy_context);
+	in_hand_shake = false;
+	return 0;
+}
+
+static void fcopy_send_data(void)
+{
+	struct hv_start_fcopy *smsg_out = &fcopy_transaction.message;
+	int operation = fcopy_transaction.fcopy_msg->operation;
+	struct hv_start_fcopy *smsg_in;
+
+	/*
+	 * The  strings sent from the host are encoded in
+	 * in utf16; convert it to utf8 strings.
+	 * The host assures us that the utf16 strings will not exceed
+	 * the max lengths specified. We will however, reserve room
+	 * for the string terminating character - in the utf16s_utf8s()
+	 * function we limit the size of the buffer where the converted
+	 * string is placed to W_MAX_PATH -1 to guarantee
+	 * that the strings can be properly terminated!
+	 */
+
+	switch (operation) {
+	case START_FILE_COPY:
+		memset(smsg_out, 0, sizeof(struct hv_start_fcopy));
+		smsg_out->hdr.operation = operation;
+		smsg_in = (struct hv_start_fcopy *)fcopy_transaction.fcopy_msg;
+
+		utf16s_to_utf8s((wchar_t *)smsg_in->file_name, W_MAX_PATH,
+				UTF16_LITTLE_ENDIAN,
+				(__u8 *)smsg_out->file_name, W_MAX_PATH - 1);
+
+		utf16s_to_utf8s((wchar_t *)smsg_in->path_name, W_MAX_PATH,
+				UTF16_LITTLE_ENDIAN,
+				(__u8 *)smsg_out->path_name, W_MAX_PATH - 1);
+
+		smsg_out->copy_flags = smsg_in->copy_flags;
+		smsg_out->file_size = smsg_in->file_size;
+		break;
+
+	default:
+		break;
+	}
+	up(&fcopy_transaction.read_sema);
+	return;
+}
+
+/*
+ * Send a response back to the host.
+ */
+
+static void
+fcopy_respond_to_host(int error)
+{
+	struct icmsg_hdr *icmsghdr;
+	u32 buf_len;
+	struct vmbus_channel *channel;
+	u64 req_id;
+
+	/*
+	 * Copy the global state for completing the transaction. Note that
+	 * only one transaction can be active at a time. This is guaranteed
+	 * by the file copy protocol implemented by the host. Furthermore,
+	 * the "transaction active" state we maintain ensures that there can
+	 * only be one active transaction at a time.
+	 */
+
+	buf_len = fcopy_transaction.recv_len;
+	channel = fcopy_transaction.recv_channel;
+	req_id = fcopy_transaction.recv_req_id;
+
+	fcopy_transaction.active = false;
+
+	icmsghdr = (struct icmsg_hdr *)
+			&recv_buffer[sizeof(struct vmbuspipe_hdr)];
+
+	if (channel->onchannel_callback == NULL)
+		/*
+		 * We have raced with util driver being unloaded;
+		 * silently return.
+		 */
+		return;
+
+	icmsghdr->status = error;
+	icmsghdr->icflags = ICMSGHDRFLAG_TRANSACTION | ICMSGHDRFLAG_RESPONSE;
+	vmbus_sendpacket(channel, recv_buffer, buf_len, req_id,
+				VM_PKT_DATA_INBAND, 0);
+}
+
+void hv_fcopy_onchannelcallback(void *context)
+{
+	struct vmbus_channel *channel = context;
+	u32 recvlen;
+	u64 requestid;
+	struct hv_fcopy_hdr *fcopy_msg;
+	struct icmsg_hdr *icmsghdr;
+	struct icmsg_negotiate *negop = NULL;
+	int util_fw_version;
+	int fcopy_srv_version;
+
+	if (fcopy_transaction.active) {
+		/*
+		 * We will defer processing this callback once
+		 * the current transaction is complete.
+		 */
+		fcopy_transaction.fcopy_context = context;
+		return;
+	}
+
+	vmbus_recvpacket(channel, recv_buffer, PAGE_SIZE * 2, &recvlen,
+			 &requestid);
+	if (recvlen <= 0)
+		return;
+
+	icmsghdr = (struct icmsg_hdr *)&recv_buffer[
+			sizeof(struct vmbuspipe_hdr)];
+	if (icmsghdr->icmsgtype == ICMSGTYPE_NEGOTIATE) {
+		util_fw_version = UTIL_FW_VERSION;
+		fcopy_srv_version = WIN8_SRV_VERSION;
+		vmbus_prep_negotiate_resp(icmsghdr, negop, recv_buffer,
+				util_fw_version, fcopy_srv_version);
+	} else {
+		fcopy_msg = (struct hv_fcopy_hdr *)&recv_buffer[
+				sizeof(struct vmbuspipe_hdr) +
+				sizeof(struct icmsg_hdr)];
+
+		/*
+		 * Stash away this global state for completing the
+		 * transaction; note transactions are serialized.
+		 */
+
+		fcopy_transaction.active = true;
+		fcopy_transaction.recv_len = recvlen;
+		fcopy_transaction.recv_channel = channel;
+		fcopy_transaction.recv_req_id = requestid;
+		fcopy_transaction.fcopy_msg = fcopy_msg;
+
+		/*
+		 * Send the information to the user-level daemon.
+		 */
+		fcopy_send_data();
+		schedule_delayed_work(&fcopy_work, 5*HZ);
+		return;
+	}
+	icmsghdr->icflags = ICMSGHDRFLAG_TRANSACTION | ICMSGHDRFLAG_RESPONSE;
+	vmbus_sendpacket(channel, recv_buffer, recvlen, requestid,
+			VM_PKT_DATA_INBAND, 0);
+}
+
+/*
+ * Create a char device that can support read/write for passing
+ * the payload.
+ */
+
+static ssize_t fcopy_read(struct file *file, char __user *buf,
+		size_t count, loff_t *ppos)
+{
+	void *src;
+	size_t copy_size;
+	int operation;
+
+	/*
+	 * Wait until there is something to be read.
+	 */
+	if (down_interruptible(&fcopy_transaction.read_sema))
+		return -EINTR;
+
+	/*
+	 * The channel may be rescinded and in this case, we will wakeup the
+	 * the thread blocked on the semaphore and we will use the opened
+	 * state to correctly handle this case.
+	 */
+	if (!opened)
+		return -ENODEV;
+
+	operation = fcopy_transaction.fcopy_msg->operation;
+
+	if (operation == START_FILE_COPY) {
+		src = &fcopy_transaction.message;
+		copy_size = sizeof(struct hv_start_fcopy);
+		if (count < copy_size)
+			return 0;
+	} else {
+		src = fcopy_transaction.fcopy_msg;
+		copy_size = sizeof(struct hv_do_fcopy);
+		if (count < copy_size)
+			return 0;
+	}
+	if (copy_to_user(buf, src, copy_size))
+		return -EFAULT;
+
+	return copy_size;
+}
+
+static ssize_t fcopy_write(struct file *file, const char __user *buf,
+			size_t count, loff_t *ppos)
+{
+	int response = 0;
+
+	if (count != sizeof(int))
+		return -EINVAL;
+
+	if (copy_from_user(&response, buf, sizeof(int)))
+		return -EFAULT;
+
+	if (in_hand_shake) {
+		if (fcopy_handle_handshake(response))
+			return -EINVAL;
+		return sizeof(int);
+	}
+
+	/*
+	 * Complete the transaction by forwarding the result
+	 * to the host. But first, cancel the timeout.
+	 */
+	if (cancel_delayed_work_sync(&fcopy_work))
+		fcopy_respond_to_host(response);
+
+	return sizeof(int);
+}
+
+int fcopy_open(struct inode *inode, struct file *f)
+{
+	/*
+	 * The user level daemon that will open this device is
+	 * really an extension of this driver. We can have only
+	 * active open at a time.
+	 */
+	if (opened)
+		return -EBUSY;
+
+	/*
+	 * The daemon is alive; setup the state.
+	 */
+	opened = true;
+	return 0;
+}
+
+int fcopy_release(struct inode *inode, struct file *f)
+{
+	/*
+	 * The daemon has exited; reset the state.
+	 */
+	in_hand_shake = true;
+	opened = false;
+	return 0;
+}
+
+
+static const struct file_operations fcopy_fops = {
+	.read           = fcopy_read,
+	.write          = fcopy_write,
+	.release	= fcopy_release,
+	.open		= fcopy_open,
+};
+
+static struct miscdevice fcopy_misc = {
+	.minor          = MISC_DYNAMIC_MINOR,
+	.name           = "vmbus/hv_fcopy",
+	.fops           = &fcopy_fops,
+};
+
+static int fcopy_dev_init(void)
+{
+	return misc_register(&fcopy_misc);
+}
+
+static void fcopy_dev_deinit(void)
+{
+
+	/*
+	 * The device is going away - perhaps because the
+	 * host has rescinded the channel. Setup state so that
+	 * user level daemon can gracefully exit if it is blocked
+	 * on the read semaphore.
+	 */
+	opened = false;
+	/*
+	 * Signal the semaphore as the device is
+	 * going away.
+	 */
+	up(&fcopy_transaction.read_sema);
+	misc_deregister(&fcopy_misc);
+}
+
+int hv_fcopy_init(struct hv_util_service *srv)
+{
+	recv_buffer = srv->recv_buffer;
+
+	/*
+	 * When this driver loads, the user level daemon that
+	 * processes the host requests may not yet be running.
+	 * Defer processing channel callbacks until the daemon
+	 * has registered.
+	 */
+	fcopy_transaction.active = true;
+	sema_init(&fcopy_transaction.read_sema, 0);
+
+	return fcopy_dev_init();
+}
+
+void hv_fcopy_deinit(void)
+{
+	cancel_delayed_work_sync(&fcopy_work);
+	fcopy_dev_deinit();
+}
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index 62dfd246b948..dd761806f0e8 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -28,6 +28,7 @@
 #include <linux/reboot.h>
 #include <linux/hyperv.h>
 
+#include "hyperv_vmbus.h"
 
 #define SD_MAJOR	3
 #define SD_MINOR	0
@@ -82,6 +83,12 @@ static struct hv_util_service util_vss = {
 	.util_deinit = hv_vss_deinit,
 };
 
+static struct hv_util_service util_fcopy = {
+	.util_cb = hv_fcopy_onchannelcallback,
+	.util_init = hv_fcopy_init,
+	.util_deinit = hv_fcopy_deinit,
+};
+
 static void perform_shutdown(struct work_struct *dummy)
 {
 	orderly_poweroff(true);
@@ -401,6 +408,10 @@ static const struct hv_vmbus_device_id id_table[] = {
 	{ HV_VSS_GUID,
 	  .driver_data = (unsigned long)&util_vss
 	},
+	/* File copy GUID */
+	{ HV_FCOPY_GUID,
+	  .driver_data = (unsigned long)&util_fcopy
+	},
 	{ },
 };
 
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 1544609881fe..860134da8039 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -669,5 +669,9 @@ int vmbus_set_event(struct vmbus_channel *channel);
 
 void vmbus_on_event(unsigned long data);
 
+int hv_fcopy_init(struct hv_util_service *);
+void hv_fcopy_deinit(void);
+void hv_fcopy_onchannelcallback(void *);
+
 
 #endif /* _HYPERV_VMBUS_H */
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index fb66fba368d7..62ea1a06836a 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -30,7 +30,6 @@
 #include <linux/types.h>
 #include <linux/scatterlist.h>
 #include <linux/list.h>
-#include <linux/uuid.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/completion.h>
@@ -1049,6 +1048,17 @@ void vmbus_driver_unregister(struct hv_driver *hv_driver);
 			0xB7, 0x6B, 0x6F, 0xD0, 0xBE, 0x52, 0x8C, 0xDA \
 		}
 
+/*
+ * Guest File Copy Service
+ * {34D14BE3-DEE4-41c8-9AE7-6B174977C192}
+ */
+
+#define HV_FCOPY_GUID \
+	.guid = { \
+			0xE3, 0x4B, 0xD1, 0x34, 0xE4, 0xDE, 0xC8, 0x41, \
+			0x9A, 0xE7, 0x6B, 0x17, 0x49, 0x77, 0xC1, 0x92 \
+		}
+
 /*
  * Common header for Hyper-V ICs
  */
diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h
index 1861f8e2ce2b..9beb7c991638 100644
--- a/include/uapi/linux/hyperv.h
+++ b/include/uapi/linux/hyperv.h
@@ -25,6 +25,8 @@
 #ifndef _UAPI_HYPERV_H
 #define _UAPI_HYPERV_H
 
+#include <linux/uuid.h>
+
 /*
  * Framework version for util services.
  */
@@ -93,6 +95,50 @@ struct hv_vss_msg {
 	};
 } __attribute__((packed));
 
+/*
+ * Implementation of a host to guest copy facility.
+ */
+
+#define FCOPY_VERSION_0 0
+#define FCOPY_CURRENT_VERSION FCOPY_VERSION_0
+#define W_MAX_PATH 260
+
+enum hv_fcopy_op {
+	START_FILE_COPY = 0,
+	WRITE_TO_FILE,
+	COMPLETE_FCOPY,
+	CANCEL_FCOPY,
+};
+
+struct hv_fcopy_hdr {
+	__u32 operation;
+	uuid_le service_id0; /* currently unused */
+	uuid_le service_id1; /* currently unused */
+} __attribute__((packed));
+
+#define OVER_WRITE	0x1
+#define CREATE_PATH	0x2
+
+struct hv_start_fcopy {
+	struct hv_fcopy_hdr hdr;
+	__u16 file_name[W_MAX_PATH];
+	__u16 path_name[W_MAX_PATH];
+	__u32 copy_flags;
+	__u64 file_size;
+} __attribute__((packed));
+
+/*
+ * The file is chunked into fragments.
+ */
+#define DATA_FRAGMENT	(6 * 1024)
+
+struct hv_do_fcopy {
+	struct hv_fcopy_hdr hdr;
+	__u64	offset;
+	__u32	size;
+	__u8	data[DATA_FRAGMENT];
+};
+
 /*
  * An implementation of HyperV key value pair (KVP) functionality for Linux.
  *
diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c
new file mode 100644
index 000000000000..4ecc4fd0bc1b
--- /dev/null
+++ b/tools/hv/hv_fcopy_daemon.c
@@ -0,0 +1,195 @@
+/*
+ * An implementation of host to guest copy functionality for Linux.
+ *
+ * Copyright (C) 2014, Microsoft, Inc.
+ *
+ * Author : K. Y. Srinivasan <kys@microsoft.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ */
+
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/poll.h>
+#include <linux/types.h>
+#include <linux/kdev_t.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <linux/hyperv.h>
+#include <syslog.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+
+static int target_fd;
+static char target_fname[W_MAX_PATH];
+
+static int hv_start_fcopy(struct hv_start_fcopy *smsg)
+{
+	int error = HV_E_FAIL;
+	char *q, *p;
+
+	/*
+	 * If possile append a path seperator to the path.
+	 */
+	if (strlen((char *)smsg->path_name) < (W_MAX_PATH - 2))
+		strcat((char *)smsg->path_name, "/");
+
+	p = (char *)smsg->path_name;
+	snprintf(target_fname, sizeof(target_fname), "%s/%s",
+		(char *)smsg->path_name, smsg->file_name);
+
+	syslog(LOG_INFO, "Target file name: %s", target_fname);
+	/*
+	 * Check to see if the path is already in place; if not,
+	 * create if required.
+	 */
+	while ((q = strchr(p, '/')) != NULL) {
+		if (q == p) {
+			p++;
+			continue;
+		}
+		*q = '\0';
+		if (access((char *)smsg->path_name, F_OK)) {
+			if (smsg->copy_flags & CREATE_PATH) {
+				if (mkdir((char *)smsg->path_name, 0755)) {
+					syslog(LOG_ERR, "Failed to create %s",
+						(char *)smsg->path_name);
+					goto done;
+				}
+			} else {
+				syslog(LOG_ERR, "Invalid path: %s",
+					(char *)smsg->path_name);
+				goto done;
+			}
+		}
+		p = q + 1;
+		*q = '/';
+	}
+
+	if (!access(target_fname, F_OK)) {
+		syslog(LOG_INFO, "File: %s exists", target_fname);
+		if (!smsg->copy_flags & OVER_WRITE)
+			goto done;
+	}
+
+	target_fd = open(target_fname, O_RDWR | O_CREAT | O_CLOEXEC, 0744);
+	if (target_fd == -1) {
+		syslog(LOG_INFO, "Open Failed: %s", strerror(errno));
+		goto done;
+	}
+
+	error = 0;
+done:
+	return error;
+}
+
+static int hv_copy_data(struct hv_do_fcopy *cpmsg)
+{
+	ssize_t bytes_written;
+
+	bytes_written = pwrite(target_fd, cpmsg->data, cpmsg->size,
+				cpmsg->offset);
+
+	if (bytes_written != cpmsg->size)
+		return HV_E_FAIL;
+
+	return 0;
+}
+
+static int hv_copy_finished(void)
+{
+	close(target_fd);
+	return 0;
+}
+static int hv_copy_cancel(void)
+{
+	close(target_fd);
+	unlink(target_fname);
+	return 0;
+
+}
+
+int main(void)
+{
+	int fd, fcopy_fd, len;
+	int error;
+	int version = FCOPY_CURRENT_VERSION;
+	char *buffer[4096 * 2];
+	struct hv_fcopy_hdr *in_msg;
+
+	if (daemon(1, 0)) {
+		syslog(LOG_ERR, "daemon() failed; error: %s", strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+
+	openlog("HV_FCOPY", 0, LOG_USER);
+	syslog(LOG_INFO, "HV_FCOPY starting; pid is:%d", getpid());
+
+	fcopy_fd = open("/dev/vmbus/hv_fcopy", O_RDWR);
+
+	if (fcopy_fd < 0) {
+		syslog(LOG_ERR, "open /dev/vmbus/hv_fcopy failed; error: %d %s",
+			errno, strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+
+	/*
+	 * Register with the kernel.
+	 */
+	if ((write(fcopy_fd, &version, sizeof(int))) != sizeof(int)) {
+		syslog(LOG_ERR, "Registration failed: %s", strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+
+	while (1) {
+		/*
+		 * In this loop we process fcopy messages after the
+		 * handshake is complete.
+		 */
+		len = pread(fcopy_fd, buffer, (4096 * 2), 0);
+		if (len < 0) {
+			syslog(LOG_ERR, "pread failed: %s", strerror(errno));
+			exit(EXIT_FAILURE);
+		}
+		in_msg = (struct hv_fcopy_hdr *)buffer;
+
+		switch (in_msg->operation) {
+		case START_FILE_COPY:
+			error = hv_start_fcopy((struct hv_start_fcopy *)in_msg);
+			break;
+		case WRITE_TO_FILE:
+			error = hv_copy_data((struct hv_do_fcopy *)in_msg);
+			break;
+		case COMPLETE_FCOPY:
+			error = hv_copy_finished();
+			break;
+		case CANCEL_FCOPY:
+			error = hv_copy_cancel();
+			break;
+
+		default:
+			syslog(LOG_ERR, "Unknown operation: %d",
+				in_msg->operation);
+
+		}
+
+		if (pwrite(fcopy_fd, &error, sizeof(int), 0) != sizeof(int)) {
+			syslog(LOG_ERR, "pwrite failed: %s", strerror(errno));
+			exit(EXIT_FAILURE);
+		}
+	}
+}
-- 
cgit v1.2.3


From d2bf2f34cc1a8304a5dab0d42e7a2ae58ede94cd Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 18 Feb 2014 15:25:32 +0100
Subject: netfilter: nft_ct: labels get support

This also adds NF_CT_LABELS_MAX_SIZE so it can be re-used
as BUILD_BUG_ON in nft_ct.

At this time, nft doesn't yet support writing to the label area;
when this changes the label->words handling needs to be moved
out of xt_connlabel.c into nf_conntrack_labels.c.

Also removes a useless run-time check: words cannot grow beyond
4 (32 bit) or 2 (64bit) since xt_connlabel enforces a maximum of
128 labels.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_labels.h |  4 +++-
 include/uapi/linux/netfilter/nf_tables.h    |  1 +
 net/netfilter/nf_conntrack_netlink.c        |  5 ++---
 net/netfilter/nft_ct.c                      | 24 ++++++++++++++++++++++++
 4 files changed, 30 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h
index c985695283b3..dec6336bf850 100644
--- a/include/net/netfilter/nf_conntrack_labels.h
+++ b/include/net/netfilter/nf_conntrack_labels.h
@@ -7,6 +7,8 @@
 
 #include <uapi/linux/netfilter/xt_connlabel.h>
 
+#define NF_CT_LABELS_MAX_SIZE ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE)
+
 struct nf_conn_labels {
 	u8 words;
 	unsigned long bits[];
@@ -29,7 +31,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct)
 	u8 words;
 
 	words = ACCESS_ONCE(net->ct.label_words);
-	if (words == 0 || WARN_ON_ONCE(words > 8))
+	if (words == 0)
 		return NULL;
 
 	cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS,
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 83c985a6170b..c84c452c62a7 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -601,6 +601,7 @@ enum nft_ct_keys {
 	NFT_CT_PROTOCOL,
 	NFT_CT_PROTO_SRC,
 	NFT_CT_PROTO_DST,
+	NFT_CT_LABELS,
 };
 
 /**
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index bb322d0beb48..47e9369997ef 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -966,7 +966,6 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name,
 	return 0;
 }
 
-#define __CTA_LABELS_MAX_LENGTH ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE)
 static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
 	[CTA_TUPLE_ORIG]	= { .type = NLA_NESTED },
 	[CTA_TUPLE_REPLY]	= { .type = NLA_NESTED },
@@ -984,9 +983,9 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
 	[CTA_ZONE]		= { .type = NLA_U16 },
 	[CTA_MARK_MASK]		= { .type = NLA_U32 },
 	[CTA_LABELS]		= { .type = NLA_BINARY,
-				    .len = __CTA_LABELS_MAX_LENGTH },
+				    .len = NF_CT_LABELS_MAX_SIZE },
 	[CTA_LABELS_MASK]	= { .type = NLA_BINARY,
-				    .len = __CTA_LABELS_MAX_LENGTH },
+				    .len = NF_CT_LABELS_MAX_SIZE },
 };
 
 static int
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 46e275403838..e59b08f9ccbd 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -19,6 +19,7 @@
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_labels.h>
 
 struct nft_ct {
 	enum nft_ct_keys	key:8;
@@ -97,6 +98,26 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
 			goto err;
 		strncpy((char *)dest->data, helper->name, sizeof(dest->data));
 		return;
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+	case NFT_CT_LABELS: {
+		struct nf_conn_labels *labels = nf_ct_labels_find(ct);
+		unsigned int size;
+
+		if (!labels) {
+			memset(dest->data, 0, sizeof(dest->data));
+			return;
+		}
+
+		BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > sizeof(dest->data));
+		size = labels->words * sizeof(long);
+
+		memcpy(dest->data, labels->bits, size);
+		if (size < sizeof(dest->data))
+			memset(((char *) dest->data) + size, 0,
+			       sizeof(dest->data) - size);
+		return;
+	}
+#endif
 	}
 
 	tuple = &ct->tuplehash[priv->dir].tuple;
@@ -220,6 +241,9 @@ static int nft_ct_init_validate_get(const struct nft_expr *expr,
 #endif
 #ifdef CONFIG_NF_CONNTRACK_SECMARK
 	case NFT_CT_SECMARK:
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+	case NFT_CT_LABELS:
 #endif
 	case NFT_CT_EXPIRATION:
 	case NFT_CT_HELPER:
-- 
cgit v1.2.3


From df942e7ba70cd0a7aa9e0432b8a6a328de2c5574 Mon Sep 17 00:00:00 2001
From: Sunil Dutt Undekari <usdutt@qti.qualcomm.com>
Date: Thu, 20 Feb 2014 16:22:09 +0530
Subject: cfg80211: Pass TDLS peer capability information in tdls_mgmt

While framing the TDLS Setup Confirmation frame, the driver needs to
know if the TDLS peer is VHT/HT/WMM capable and thus shall construct
the VHT/HT operation / WMM parameter elements accordingly. Supplicant
determines if the TDLS peer is VHT/HT/WMM capable based on the
presence of the respective IEs in the received TDLS Setup Response frame.

The host driver should not need to parse the received TDLS Response
frame and thus, should be able to rely on the supplicant to indicate
the capability of the peer through additional flags while transmitting
the TDLS Setup Confirmation frame through tdls_mgmt operations.

Signed-off-by: Sunil Dutt Undekari <usdutt@qti.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mwifiex/cfg80211.c |  4 ++--
 include/net/cfg80211.h                  |  3 ++-
 include/uapi/linux/nl80211.h            | 21 +++++++++++++++++++++
 net/mac80211/cfg.c                      |  4 ++--
 net/wireless/nl80211.c                  |  7 ++++++-
 net/wireless/rdev-ops.h                 |  9 ++++++---
 net/wireless/trace.h                    | 12 ++++++++----
 7 files changed, 47 insertions(+), 13 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index 436ba437a4ba..6948a97af839 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c
@@ -2600,8 +2600,8 @@ static int mwifiex_cfg80211_set_coalesce(struct wiphy *wiphy,
 static int
 mwifiex_cfg80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
 			   u8 *peer, u8 action_code, u8 dialog_token,
-			   u16 status_code, const u8 *extra_ies,
-			   size_t extra_ies_len)
+			   u16 status_code, u32 peer_capability,
+			   const u8 *extra_ies, size_t extra_ies_len)
 {
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
 	int ret;
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 9f90554e88c4..c89a5b5bd103 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2465,7 +2465,8 @@ struct cfg80211_ops {
 
 	int	(*tdls_mgmt)(struct wiphy *wiphy, struct net_device *dev,
 			     u8 *peer, u8 action_code,  u8 dialog_token,
-			     u16 status_code, const u8 *buf, size_t len);
+			     u16 status_code, u32 peer_capability,
+			     const u8 *buf, size_t len);
 	int	(*tdls_oper)(struct wiphy *wiphy, struct net_device *dev,
 			     u8 *peer, enum nl80211_tdls_operation oper);
 
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index ba1f7625625c..47d7087513e0 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1575,6 +1575,9 @@ enum nl80211_commands {
  *	advertise values that cannot always be met. In such cases, an attempt
  *	to add a new station entry with @NL80211_CMD_NEW_STATION may fail.
  *
+ * @NL80211_ATTR_TDLS_PEER_CAPABILITY: flags for TDLS peer capabilities, u32.
+ *	As specified in the &enum nl80211_tdls_peer_capability.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1908,6 +1911,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_MAX_AP_ASSOC_STA,
 
+	NL80211_ATTR_TDLS_PEER_CAPABILITY,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -4074,4 +4079,20 @@ struct nl80211_vendor_cmd_info {
 	__u32 subcmd;
 };
 
+/**
+ * enum nl80211_tdls_peer_capability - TDLS peer flags.
+ *
+ * Used by tdls_mgmt() to determine which conditional elements need
+ * to be added to TDLS Setup frames.
+ *
+ * @NL80211_TDLS_PEER_HT: TDLS peer is HT capable.
+ * @NL80211_TDLS_PEER_VHT: TDLS peer is VHT capable.
+ * @NL80211_TDLS_PEER_WMM: TDLS peer is WMM capable.
+ */
+enum nl80211_tdls_peer_capability {
+	NL80211_TDLS_PEER_HT = 1<<0,
+	NL80211_TDLS_PEER_VHT = 1<<1,
+	NL80211_TDLS_PEER_WMM = 1<<2,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 3849fd07a321..1acb29109b45 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3644,8 +3644,8 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev,
 
 static int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
 			       u8 *peer, u8 action_code, u8 dialog_token,
-			       u16 status_code, const u8 *extra_ies,
-			       size_t extra_ies_len)
+			       u16 status_code, u32 peer_capability,
+			       const u8 *extra_ies, size_t extra_ies_len)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 058aa0e1a462..be836098d342 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -384,6 +384,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 				   .len = IEEE80211_QOS_MAP_LEN_MAX },
 	[NL80211_ATTR_MAC_HINT] = { .len = ETH_ALEN },
 	[NL80211_ATTR_WIPHY_FREQ_HINT] = { .type = NLA_U32 },
+	[NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 },
 };
 
 /* policy for the key attributes */
@@ -7269,6 +7270,7 @@ static int nl80211_tdls_mgmt(struct sk_buff *skb, struct genl_info *info)
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct net_device *dev = info->user_ptr[1];
 	u8 action_code, dialog_token;
+	u32 peer_capability = 0;
 	u16 status_code;
 	u8 *peer;
 
@@ -7287,9 +7289,12 @@ static int nl80211_tdls_mgmt(struct sk_buff *skb, struct genl_info *info)
 	action_code = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_ACTION]);
 	status_code = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]);
 	dialog_token = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_DIALOG_TOKEN]);
+	if (info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY])
+		peer_capability =
+			nla_get_u32(info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY]);
 
 	return rdev_tdls_mgmt(rdev, dev, peer, action_code,
-			      dialog_token, status_code,
+			      dialog_token, status_code, peer_capability,
 			      nla_data(info->attrs[NL80211_ATTR_IE]),
 			      nla_len(info->attrs[NL80211_ATTR_IE]));
 }
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index c8e225947adb..74d97d33c938 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -769,13 +769,16 @@ static inline int rdev_set_rekey_data(struct cfg80211_registered_device *rdev,
 static inline int rdev_tdls_mgmt(struct cfg80211_registered_device *rdev,
 				 struct net_device *dev, u8 *peer,
 				 u8 action_code, u8 dialog_token,
-				 u16 status_code, const u8 *buf, size_t len)
+				 u16 status_code, u32 peer_capability,
+				 const u8 *buf, size_t len)
 {
 	int ret;
 	trace_rdev_tdls_mgmt(&rdev->wiphy, dev, peer, action_code,
-			     dialog_token, status_code, buf, len);
+			     dialog_token, status_code, peer_capability,
+			     buf, len);
 	ret = rdev->ops->tdls_mgmt(&rdev->wiphy, dev, peer, action_code,
-				   dialog_token, status_code, buf, len);
+				   dialog_token, status_code, peer_capability,
+				   buf, len);
 	trace_rdev_return_int(&rdev->wiphy, ret);
 	return ret;
 }
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 5eaeed59db07..aabccf13e07b 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1468,9 +1468,10 @@ TRACE_EVENT(rdev_sched_scan_start,
 TRACE_EVENT(rdev_tdls_mgmt,
 	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
 		 u8 *peer, u8 action_code, u8 dialog_token,
-		 u16 status_code, const u8 *buf, size_t len),
+		 u16 status_code, u32 peer_capability,
+		 const u8 *buf, size_t len),
 	TP_ARGS(wiphy, netdev, peer, action_code, dialog_token, status_code,
-		buf, len),
+		peer_capability, buf, len),
 	TP_STRUCT__entry(
 		WIPHY_ENTRY
 		NETDEV_ENTRY
@@ -1478,6 +1479,7 @@ TRACE_EVENT(rdev_tdls_mgmt,
 		__field(u8, action_code)
 		__field(u8, dialog_token)
 		__field(u16, status_code)
+		__field(u32, peer_capability)
 		__dynamic_array(u8, buf, len)
 	),
 	TP_fast_assign(
@@ -1487,13 +1489,15 @@ TRACE_EVENT(rdev_tdls_mgmt,
 		__entry->action_code = action_code;
 		__entry->dialog_token = dialog_token;
 		__entry->status_code = status_code;
+		__entry->peer_capability = peer_capability;
 		memcpy(__get_dynamic_array(buf), buf, len);
 	),
 	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", action_code: %u, "
-		  "dialog_token: %u, status_code: %u, buf: %#.2x ",
+		  "dialog_token: %u, status_code: %u, peer_capability: %u buf: %#.2x ",
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer),
 		  __entry->action_code, __entry->dialog_token,
-		  __entry->status_code, ((u8 *)__get_dynamic_array(buf))[0])
+		  __entry->status_code, __entry->peer_capability,
+		  ((u8 *)__get_dynamic_array(buf))[0])
 );
 
 TRACE_EVENT(rdev_dump_survey,
-- 
cgit v1.2.3


From d9b8396a52b4e857263eeb9e1eba474ea11c19bf Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 13 Feb 2014 17:16:10 +0100
Subject: cfg80211: document sched_scan_stop synchronous behaviour

Due to userspace assumptions, the sched_scan_stop operation must
be synchronous, i.e. once it returns a new scheduled scan must be
able to start immediately. Document this in the API.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 7 ++++++-
 include/uapi/linux/nl80211.h | 5 +++--
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c89a5b5bd103..7c9fe4b05927 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2206,7 +2206,12 @@ struct cfg80211_qos_map {
  * @set_cqm_txe_config: Configure connection quality monitor TX error
  *	thresholds.
  * @sched_scan_start: Tell the driver to start a scheduled scan.
- * @sched_scan_stop: Tell the driver to stop an ongoing scheduled scan.
+ * @sched_scan_stop: Tell the driver to stop an ongoing scheduled scan. This
+ *	call must stop the scheduled scan and be ready for starting a new one
+ *	before it returns, i.e. @sched_scan_start may be called immediately
+ *	after that again and should not fail in that case. The driver should
+ *	not call cfg80211_sched_scan_stopped() for a requested stop (when this
+ *	method returns 0.)
  *
  * @mgmt_frame_register: Notify driver that a management frame type was
  *	registered. Note that this callback may not sleep, and cannot run
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 47d7087513e0..81481cff1dc1 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -303,8 +303,9 @@
  *	passed, all channels allowed for the current regulatory domain
  *	are used.  Extra IEs can also be passed from the userspace by
  *	using the %NL80211_ATTR_IE attribute.
- * @NL80211_CMD_STOP_SCHED_SCAN: stop a scheduled scan.  Returns -ENOENT
- *	if scheduled scan is not running.
+ * @NL80211_CMD_STOP_SCHED_SCAN: stop a scheduled scan. Returns -ENOENT if
+ *	scheduled scan is not running. The caller may assume that as soon
+ *	as the call returns, it is safe to start a new scheduled scan again.
  * @NL80211_CMD_SCHED_SCAN_RESULTS: indicates that there are scheduled scan
  *	results available.
  * @NL80211_CMD_SCHED_SCAN_STOPPED: indicates that the scheduled scan has
-- 
cgit v1.2.3


From b0dfd2ea12d92b49639ad84f24ddd00c7ac144b5 Mon Sep 17 00:00:00 2001
From: Janusz Dziedzic <janusz.dziedzic@tieto.com>
Date: Thu, 20 Feb 2014 13:52:16 +0100
Subject: cfg80211: regulatory: introduce NL80211_RRF_AUTO_BW rule flag

Introduce NL80211_RRF_AUTO_BW rule flag. If this flag set
maximum available bandwidth should be calculated base on
contiguous rules and wider channels will be allowed to cross
multiple contiguous/overlapping frequency ranges.

In case of old kernels maximum bandwidth from regulatory
rule will be used, while there is no NL80211_RRF_AUTO_BW flag.

This fixes the previous commit 9752482083066af7ac18a5ca376f
("cfg80211: regulatory introduce maximum bandwidth calculation")
which was found to be a problem for userspace API compatibility.

Signed-off-by: Janusz Dziedzic <janusz.dziedzic@tieto.com>
[edit commit log, use sizeof()]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  9 ++---
 net/wireless/genregdb.awk    |  2 ++
 net/wireless/nl80211.c       |  7 ++--
 net/wireless/reg.c           | 83 +++++++++++++++++++++++++-------------------
 4 files changed, 58 insertions(+), 43 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 81481cff1dc1..ff72cab3cd3a 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2443,10 +2443,7 @@ enum nl80211_reg_type {
  * 	in KHz. This is not a center a frequency but an actual regulatory
  * 	band edge.
  * @NL80211_ATTR_FREQ_RANGE_MAX_BW: maximum allowed bandwidth for this
- *	frequency range, in KHz. If not present or 0, maximum available
- *	bandwidth should be calculated base on contiguous rules and wider
- *	channels will be allowed to cross multiple contiguous/overlapping
- *	frequency ranges.
+ *	frequency range, in KHz.
  * @NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN: the maximum allowed antenna gain
  * 	for a given frequency range. The value is in mBi (100 * dBi).
  * 	If you don't have one then don't send this.
@@ -2517,6 +2514,9 @@ enum nl80211_sched_scan_match_attr {
  * @NL80211_RRF_NO_IR: no mechanisms that initiate radiation are allowed,
  * 	this includes probe requests or modes of operation that require
  * 	beaconing.
+ * @NL80211_RRF_AUTO_BW: maximum available bandwidth should be calculated
+ *	base on contiguous rules and wider channels will be allowed to cross
+ *	multiple contiguous/overlapping frequency ranges.
  */
 enum nl80211_reg_rule_flags {
 	NL80211_RRF_NO_OFDM		= 1<<0,
@@ -2528,6 +2528,7 @@ enum nl80211_reg_rule_flags {
 	NL80211_RRF_PTMP_ONLY		= 1<<6,
 	NL80211_RRF_NO_IR		= 1<<7,
 	__NL80211_RRF_NO_IBSS		= 1<<8,
+	NL80211_RRF_AUTO_BW		= 1<<11,
 };
 
 #define NL80211_RRF_PASSIVE_SCAN	NL80211_RRF_NO_IR
diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk
index 9a8217d2a908..fdfd3f063a9b 100644
--- a/net/wireless/genregdb.awk
+++ b/net/wireless/genregdb.awk
@@ -105,6 +105,8 @@ function parse_reg_rule()
 			flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
 		} else if (flagarray[arg] == "NO-IR") {
 			flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
+		} else if (flagarray[arg] == "AUTO-BW") {
+			flags = flags "\n\t\t\tNL80211_RRF_AUTO_BW | "
 		}
 
 	}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index be836098d342..1e5a434e4224 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4628,6 +4628,8 @@ static int parse_reg_rule(struct nlattr *tb[],
 		return -EINVAL;
 	if (!tb[NL80211_ATTR_FREQ_RANGE_END])
 		return -EINVAL;
+	if (!tb[NL80211_ATTR_FREQ_RANGE_MAX_BW])
+		return -EINVAL;
 	if (!tb[NL80211_ATTR_POWER_RULE_MAX_EIRP])
 		return -EINVAL;
 
@@ -4637,9 +4639,8 @@ static int parse_reg_rule(struct nlattr *tb[],
 		nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_START]);
 	freq_range->end_freq_khz =
 		nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_END]);
-	if (tb[NL80211_ATTR_FREQ_RANGE_MAX_BW])
-		freq_range->max_bandwidth_khz =
-			nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]);
+	freq_range->max_bandwidth_khz =
+		nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]);
 
 	power_rule->max_eirp =
 		nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]);
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 27c5253e7a61..6b6f33ad78f2 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -563,9 +563,6 @@ unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd,
 		if (freq_range_tmp->end_freq_khz < freq_range->start_freq_khz)
 			break;
 
-		if (freq_range_tmp->max_bandwidth_khz)
-			break;
-
 		freq_range = freq_range_tmp;
 	}
 
@@ -582,9 +579,6 @@ unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd,
 		if (freq_range_tmp->start_freq_khz > freq_range->end_freq_khz)
 			break;
 
-		if (freq_range_tmp->max_bandwidth_khz)
-			break;
-
 		freq_range = freq_range_tmp;
 	}
 
@@ -729,21 +723,29 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1,
 	max_bandwidth1 = freq_range1->max_bandwidth_khz;
 	max_bandwidth2 = freq_range2->max_bandwidth_khz;
 
-	/*
-	 * In case max_bandwidth1 == 0 and max_bandwith2 == 0 set
-	 * output bandwidth as 0 (auto calculation). Next we will
-	 * calculate this correctly in handle_channel function.
-	 * In other case calculate output bandwidth here.
-	 */
-	if (max_bandwidth1 || max_bandwidth2) {
-		if (!max_bandwidth1)
-			max_bandwidth1 = reg_get_max_bandwidth(rd1, rule1);
-		if (!max_bandwidth2)
-			max_bandwidth2 = reg_get_max_bandwidth(rd2, rule2);
-	}
+	if (rule1->flags & NL80211_RRF_AUTO_BW)
+		max_bandwidth1 = reg_get_max_bandwidth(rd1, rule1);
+	if (rule2->flags & NL80211_RRF_AUTO_BW)
+		max_bandwidth2 = reg_get_max_bandwidth(rd2, rule2);
 
 	freq_range->max_bandwidth_khz = min(max_bandwidth1, max_bandwidth2);
 
+	intersected_rule->flags = rule1->flags | rule2->flags;
+
+	/*
+	 * In case NL80211_RRF_AUTO_BW requested for both rules
+	 * set AUTO_BW in intersected rule also. Next we will
+	 * calculate BW correctly in handle_channel function.
+	 * In other case remove AUTO_BW flag while we calculate
+	 * maximum bandwidth correctly and auto calculation is
+	 * not required.
+	 */
+	if ((rule1->flags & NL80211_RRF_AUTO_BW) &&
+	    (rule2->flags & NL80211_RRF_AUTO_BW))
+		intersected_rule->flags |= NL80211_RRF_AUTO_BW;
+	else
+		intersected_rule->flags &= ~NL80211_RRF_AUTO_BW;
+
 	freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz;
 	if (freq_range->max_bandwidth_khz > freq_diff)
 		freq_range->max_bandwidth_khz = freq_diff;
@@ -753,8 +755,6 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1,
 	power_rule->max_antenna_gain = min(power_rule1->max_antenna_gain,
 		power_rule2->max_antenna_gain);
 
-	intersected_rule->flags = rule1->flags | rule2->flags;
-
 	if (!is_valid_reg_rule(intersected_rule))
 		return -EINVAL;
 
@@ -938,31 +938,42 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator)
 EXPORT_SYMBOL(reg_initiator_name);
 
 #ifdef CONFIG_CFG80211_REG_DEBUG
-static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan,
+static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd,
+				    struct ieee80211_channel *chan,
 				    const struct ieee80211_reg_rule *reg_rule)
 {
 	const struct ieee80211_power_rule *power_rule;
 	const struct ieee80211_freq_range *freq_range;
-	char max_antenna_gain[32];
+	char max_antenna_gain[32], bw[32];
 
 	power_rule = &reg_rule->power_rule;
 	freq_range = &reg_rule->freq_range;
 
 	if (!power_rule->max_antenna_gain)
-		snprintf(max_antenna_gain, 32, "N/A");
+		snprintf(max_antenna_gain, sizeof(max_antenna_gain), "N/A");
 	else
-		snprintf(max_antenna_gain, 32, "%d", power_rule->max_antenna_gain);
+		snprintf(max_antenna_gain, sizeof(max_antenna_gain), "%d",
+			 power_rule->max_antenna_gain);
+
+	if (reg_rule->flags & NL80211_RRF_AUTO_BW)
+		snprintf(bw, sizeof(bw), "%d KHz, %d KHz AUTO",
+			 freq_range->max_bandwidth_khz,
+			 reg_get_max_bandwidth(regd, reg_rule));
+	else
+		snprintf(bw, sizeof(bw), "%d KHz",
+			 freq_range->max_bandwidth_khz);
 
 	REG_DBG_PRINT("Updating information on frequency %d MHz with regulatory rule:\n",
 		      chan->center_freq);
 
-	REG_DBG_PRINT("%d KHz - %d KHz @ %d KHz), (%s mBi, %d mBm)\n",
+	REG_DBG_PRINT("%d KHz - %d KHz @ %s), (%s mBi, %d mBm)\n",
 		      freq_range->start_freq_khz, freq_range->end_freq_khz,
-		      freq_range->max_bandwidth_khz, max_antenna_gain,
+		      bw, max_antenna_gain,
 		      power_rule->max_eirp);
 }
 #else
-static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan,
+static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd,
+				    struct ieee80211_channel *chan,
 				    const struct ieee80211_reg_rule *reg_rule)
 {
 	return;
@@ -1022,17 +1033,16 @@ static void handle_channel(struct wiphy *wiphy,
 		return;
 	}
 
-	chan_reg_rule_print_dbg(chan, reg_rule);
+	regd = reg_get_regdomain(wiphy);
+	chan_reg_rule_print_dbg(regd, chan, reg_rule);
 
 	power_rule = &reg_rule->power_rule;
 	freq_range = &reg_rule->freq_range;
 
 	max_bandwidth_khz = freq_range->max_bandwidth_khz;
 	/* Check if auto calculation requested */
-	if (!max_bandwidth_khz) {
-		regd = reg_get_regdomain(wiphy);
+	if (reg_rule->flags & NL80211_RRF_AUTO_BW)
 		max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule);
-	}
 
 	if (max_bandwidth_khz < MHZ_TO_KHZ(40))
 		bw_flags = IEEE80211_CHAN_NO_HT40;
@@ -1437,14 +1447,14 @@ static void handle_channel_custom(struct wiphy *wiphy,
 		return;
 	}
 
-	chan_reg_rule_print_dbg(chan, reg_rule);
+	chan_reg_rule_print_dbg(regd, chan, reg_rule);
 
 	power_rule = &reg_rule->power_rule;
 	freq_range = &reg_rule->freq_range;
 
 	max_bandwidth_khz = freq_range->max_bandwidth_khz;
 	/* Check if auto calculation requested */
-	if (!max_bandwidth_khz)
+	if (reg_rule->flags & NL80211_RRF_AUTO_BW)
 		max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule);
 
 	if (max_bandwidth_khz < MHZ_TO_KHZ(40))
@@ -2254,11 +2264,12 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
 		freq_range = &reg_rule->freq_range;
 		power_rule = &reg_rule->power_rule;
 
-		if (!freq_range->max_bandwidth_khz)
-			snprintf(bw, 32, "%d KHz, AUTO",
+		if (reg_rule->flags & NL80211_RRF_AUTO_BW)
+			snprintf(bw, sizeof(bw), "%d KHz, %d KHz AUTO",
+				 freq_range->max_bandwidth_khz,
 				 reg_get_max_bandwidth(rd, reg_rule));
 		else
-			snprintf(bw, 32, "%d KHz",
+			snprintf(bw, sizeof(bw), "%d KHz",
 				 freq_range->max_bandwidth_khz);
 
 		/*
-- 
cgit v1.2.3


From 00f5e61998dd17f5375d9dfc01331f104b83f841 Mon Sep 17 00:00:00 2001
From: Namjae Jeon <namjae.jeon@samsung.com>
Date: Mon, 24 Feb 2014 10:58:15 +1100
Subject: fs: Add new flag(FALLOC_FL_COLLAPSE_RANGE) for fallocate

This patch is in response of the following post:
http://lwn.net/Articles/556136/
"ext4: introduce two new ioctls"

Dave chinner suggested that truncate_block_range
(which was one of the ioctls name) should be a fallocate operation
and not any fs specific ioctl, hence we add this functionality to new flags of fallocate.

This new functionality of collapsing range could be used by media editing tools
which does non linear editing to quickly purge and edit parts of a media file.
This will immensely improve the performance of these operations.
The limitation of fs block size aligned offsets can be easily handled
by media codecs which are encapsulated in a conatiner as they have to
just change the offset to next keyframe value to match the proper alignment.

Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/open.c                   | 24 +++++++++++++++++++++---
 include/uapi/linux/falloc.h | 21 +++++++++++++++++++++
 2 files changed, 42 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/fs/open.c b/fs/open.c
index 4b3e1edf2fe4..4a923a547d10 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -231,7 +231,8 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 		return -EINVAL;
 
 	/* Return error if mode is not supported */
-	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+		     FALLOC_FL_COLLAPSE_RANGE))
 		return -EOPNOTSUPP;
 
 	/* Punch hole must have keep size set */
@@ -239,11 +240,20 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	    !(mode & FALLOC_FL_KEEP_SIZE))
 		return -EOPNOTSUPP;
 
+	/* Collapse range should only be used exclusively. */
+	if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
+	    (mode & ~FALLOC_FL_COLLAPSE_RANGE))
+		return -EINVAL;
+
 	if (!(file->f_mode & FMODE_WRITE))
 		return -EBADF;
 
-	/* It's not possible punch hole on append only file */
-	if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
+	/*
+	 * It's not possible to punch hole or perform collapse range
+	 * on append only file
+	 */
+	if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)
+	    && IS_APPEND(inode))
 		return -EPERM;
 
 	if (IS_IMMUTABLE(inode))
@@ -271,6 +281,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
 		return -EFBIG;
 
+	/*
+	 * There is no need to overlap collapse range with EOF, in which case
+	 * it is effectively a truncate operation
+	 */
+	if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
+	    (offset + len >= i_size_read(inode)))
+		return -EINVAL;
+
 	if (!file->f_op->fallocate)
 		return -EOPNOTSUPP;
 
diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h
index 990c4ccf8b61..5ff562ddac0b 100644
--- a/include/uapi/linux/falloc.h
+++ b/include/uapi/linux/falloc.h
@@ -5,5 +5,26 @@
 #define FALLOC_FL_PUNCH_HOLE	0x02 /* de-allocates range */
 #define FALLOC_FL_NO_HIDE_STALE	0x04 /* reserved codepoint */
 
+/*
+ * FALLOC_FL_COLLAPSE_RANGE is used to remove a range of a file
+ * without leaving a hole in the file. The contents of the file beyond
+ * the range being removed is appended to the start offset of the range
+ * being removed (i.e. the hole that was punched is "collapsed"),
+ * resulting in a file layout that looks like the range that was
+ * removed never existed. As such collapsing a range of a file changes
+ * the size of the file, reducing it by the same length of the range
+ * that has been removed by the operation.
+ *
+ * Different filesystems may implement different limitations on the
+ * granularity of the operation. Most will limit operations to
+ * filesystem block size boundaries, but this boundary may be larger or
+ * smaller depending on the filesystem and/or the configuration of the
+ * filesystem or file.
+ *
+ * Attempting to collapse a range that crosses the end of the file is
+ * considered an illegal operation - just use ftruncate(2) if you need
+ * to collapse a range that crosses EOF.
+ */
+#define FALLOC_FL_COLLAPSE_RANGE	0x08
 
 #endif /* _UAPI_FALLOC_H_ */
-- 
cgit v1.2.3


From 089027e57cfa79337feffdd7252c8ba0be352afa Mon Sep 17 00:00:00 2001
From: Janusz Dziedzic <janusz.dziedzic@tieto.com>
Date: Fri, 21 Feb 2014 19:46:12 +0100
Subject: cfg80211: regulatory: allow getting DFS CAC time from userspace

Introduce DFS CAC time as a regd param, configured per REG_RULE and
set per channel in cfg80211. DFS CAC time is close connected with
regulatory database configuration. Instead of using hardcoded values,
get DFS CAC time form regulatory database. Pass DFS CAC time to user
mode (mainly for iw reg get, iw list, iw info). Allow setting DFS CAC
time via CRDA. Add support for internal regulatory database.

Signed-off-by: Janusz Dziedzic <janusz.dziedzic@tieto.com>
[rewrap commit log]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  2 ++
 include/net/regulatory.h     | 21 +++++++++++++--------
 include/uapi/linux/nl80211.h |  6 ++++++
 net/wireless/genregdb.awk    |  8 +++++++-
 net/wireless/nl80211.c       | 13 ++++++++++++-
 net/wireless/reg.c           | 32 ++++++++++++++++++++++++++------
 6 files changed, 66 insertions(+), 16 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8c9ba44fb7cf..bfa9a0c7b2d7 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -151,6 +151,7 @@ enum ieee80211_channel_flags {
  * @dfs_state: current state of this channel. Only relevant if radar is required
  *	on this channel.
  * @dfs_state_entered: timestamp (jiffies) when the dfs state was entered.
+ * @dfs_cac_ms: DFS CAC time in milliseconds, this is valid for DFS channels.
  */
 struct ieee80211_channel {
 	enum ieee80211_band band;
@@ -165,6 +166,7 @@ struct ieee80211_channel {
 	int orig_mag, orig_mpwr;
 	enum nl80211_dfs_state dfs_state;
 	unsigned long dfs_state_entered;
+	unsigned int dfs_cac_ms;
 };
 
 /**
diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index b07cdc9fa454..75fc1f5a948d 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -155,6 +155,7 @@ struct ieee80211_reg_rule {
 	struct ieee80211_freq_range freq_range;
 	struct ieee80211_power_rule power_rule;
 	u32 flags;
+	u32 dfs_cac_ms;
 };
 
 struct ieee80211_regdomain {
@@ -172,14 +173,18 @@ struct ieee80211_regdomain {
 #define DBM_TO_MBM(gain) ((gain) * 100)
 #define MBM_TO_DBM(gain) ((gain) / 100)
 
-#define REG_RULE(start, end, bw, gain, eirp, reg_flags) \
-{							\
-	.freq_range.start_freq_khz = MHZ_TO_KHZ(start),	\
-	.freq_range.end_freq_khz = MHZ_TO_KHZ(end),	\
-	.freq_range.max_bandwidth_khz = MHZ_TO_KHZ(bw),	\
-	.power_rule.max_antenna_gain = DBI_TO_MBI(gain),\
-	.power_rule.max_eirp = DBM_TO_MBM(eirp),	\
-	.flags = reg_flags,				\
+#define REG_RULE_EXT(start, end, bw, gain, eirp, dfs_cac, reg_flags)	\
+{									\
+	.freq_range.start_freq_khz = MHZ_TO_KHZ(start),			\
+	.freq_range.end_freq_khz = MHZ_TO_KHZ(end),			\
+	.freq_range.max_bandwidth_khz = MHZ_TO_KHZ(bw),			\
+	.power_rule.max_antenna_gain = DBI_TO_MBI(gain),		\
+	.power_rule.max_eirp = DBM_TO_MBM(eirp),			\
+	.flags = reg_flags,						\
+	.dfs_cac_ms = dfs_cac,						\
 }
 
+#define REG_RULE(start, end, bw, gain, eirp, reg_flags) \
+	REG_RULE_EXT(start, end, bw, gain, eirp, 0, reg_flags)
+
 #endif
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index ff72cab3cd3a..1ba9d626aa83 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2335,6 +2335,7 @@ enum nl80211_band_attr {
  * @NL80211_FREQUENCY_ATTR_NO_160MHZ: any 160 MHz (but not 80+80) channel
  *	using this channel as the primary or any of the secondary channels
  *	isn't possible
+ * @NL80211_FREQUENCY_ATTR_DFS_CAC_TIME: DFS CAC time in milliseconds.
  * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number
  *	currently defined
  * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use
@@ -2353,6 +2354,7 @@ enum nl80211_frequency_attr {
 	NL80211_FREQUENCY_ATTR_NO_HT40_PLUS,
 	NL80211_FREQUENCY_ATTR_NO_80MHZ,
 	NL80211_FREQUENCY_ATTR_NO_160MHZ,
+	NL80211_FREQUENCY_ATTR_DFS_CAC_TIME,
 
 	/* keep last */
 	__NL80211_FREQUENCY_ATTR_AFTER_LAST,
@@ -2449,6 +2451,8 @@ enum nl80211_reg_type {
  * 	If you don't have one then don't send this.
  * @NL80211_ATTR_POWER_RULE_MAX_EIRP: the maximum allowed EIRP for
  * 	a given frequency range. The value is in mBm (100 * dBm).
+ * @NL80211_ATTR_DFS_CAC_TIME: DFS CAC time in milliseconds.
+ *	If not present or 0 default CAC time will be used.
  * @NL80211_REG_RULE_ATTR_MAX: highest regulatory rule attribute number
  *	currently defined
  * @__NL80211_REG_RULE_ATTR_AFTER_LAST: internal use
@@ -2464,6 +2468,8 @@ enum nl80211_reg_rule_attr {
 	NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN,
 	NL80211_ATTR_POWER_RULE_MAX_EIRP,
 
+	NL80211_ATTR_DFS_CAC_TIME,
+
 	/* keep last */
 	__NL80211_REG_RULE_ATTR_AFTER_LAST,
 	NL80211_REG_RULE_ATTR_MAX = __NL80211_REG_RULE_ATTR_AFTER_LAST - 1
diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk
index fdfd3f063a9b..b35da8dc85de 100644
--- a/net/wireless/genregdb.awk
+++ b/net/wireless/genregdb.awk
@@ -66,6 +66,7 @@ function parse_reg_rule()
 	units = $8
 	sub(/\)/, "", units)
 	sub(/,/, "", units)
+	dfs_cac = $9
 	if (units == "mW") {
 		if (power == 100) {
 			power = 20
@@ -78,7 +79,12 @@ function parse_reg_rule()
 		} else {
 			print "Unknown power value in database!"
 		}
+	} else {
+		dfs_cac = $8
 	}
+	sub(/,/, "", dfs_cac)
+	sub(/\(/, "", dfs_cac)
+	sub(/\)/, "", dfs_cac)
 	flagstr = ""
 	for (i=8; i<=NF; i++)
 		flagstr = flagstr $i
@@ -111,7 +117,7 @@ function parse_reg_rule()
 
 	}
 	flags = flags "0"
-	printf "\t\tREG_RULE(%d, %d, %d, %d, %d, %s),\n", start, end, bw, gain, power, flags
+	printf "\t\tREG_RULE_EXT(%d, %d, %d, %d, %d, %d, %s),\n", start, end, bw, gain, power, dfs_cac, flags
 	rules++
 }
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2c38b28a85b9..9f7ebf94a050 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -593,6 +593,10 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
 			if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_TIME,
 					time))
 				goto nla_put_failure;
+			if (nla_put_u32(msg,
+					NL80211_FREQUENCY_ATTR_DFS_CAC_TIME,
+					chan->dfs_cac_ms))
+				goto nla_put_failure;
 		}
 	}
 
@@ -4614,6 +4618,7 @@ static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] =
 	[NL80211_ATTR_FREQ_RANGE_MAX_BW]	= { .type = NLA_U32 },
 	[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]	= { .type = NLA_U32 },
 	[NL80211_ATTR_POWER_RULE_MAX_EIRP]	= { .type = NLA_U32 },
+	[NL80211_ATTR_DFS_CAC_TIME]		= { .type = NLA_U32 },
 };
 
 static int parse_reg_rule(struct nlattr *tb[],
@@ -4649,6 +4654,10 @@ static int parse_reg_rule(struct nlattr *tb[],
 		power_rule->max_antenna_gain =
 			nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]);
 
+	if (tb[NL80211_ATTR_DFS_CAC_TIME])
+		reg_rule->dfs_cac_ms =
+			nla_get_u32(tb[NL80211_ATTR_DFS_CAC_TIME]);
+
 	return 0;
 }
 
@@ -5136,7 +5145,9 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 		    nla_put_u32(msg, NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN,
 				power_rule->max_antenna_gain) ||
 		    nla_put_u32(msg, NL80211_ATTR_POWER_RULE_MAX_EIRP,
-				power_rule->max_eirp))
+				power_rule->max_eirp) ||
+		    nla_put_u32(msg, NL80211_ATTR_DFS_CAC_TIME,
+				reg_rule->dfs_cac_ms))
 			goto nla_put_failure_rcu;
 
 		nla_nest_end(msg, nl_reg_rule);
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 651404c22de9..b95e9cf139c0 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -756,6 +756,9 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1,
 	power_rule->max_antenna_gain = min(power_rule1->max_antenna_gain,
 		power_rule2->max_antenna_gain);
 
+	intersected_rule->dfs_cac_ms = max(rule1->dfs_cac_ms,
+					   rule2->dfs_cac_ms);
+
 	if (!is_valid_reg_rule(intersected_rule))
 		return -EINVAL;
 
@@ -1078,6 +1081,14 @@ static void handle_channel(struct wiphy *wiphy,
 		min_t(int, chan->orig_mag,
 		      MBI_TO_DBI(power_rule->max_antenna_gain));
 	chan->max_reg_power = (int) MBM_TO_DBM(power_rule->max_eirp);
+
+	if (chan->flags & IEEE80211_CHAN_RADAR) {
+		if (reg_rule->dfs_cac_ms)
+			chan->dfs_cac_ms = reg_rule->dfs_cac_ms;
+		else
+			chan->dfs_cac_ms = IEEE80211_DFS_MIN_CAC_TIME_MS;
+	}
+
 	if (chan->orig_mpwr) {
 		/*
 		 * Devices that use REGULATORY_COUNTRY_IE_FOLLOW_POWER
@@ -2256,9 +2267,9 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
 	const struct ieee80211_reg_rule *reg_rule = NULL;
 	const struct ieee80211_freq_range *freq_range = NULL;
 	const struct ieee80211_power_rule *power_rule = NULL;
-	char bw[32];
+	char bw[32], cac_time[32];
 
-	pr_info("  (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp)\n");
+	pr_info("  (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)\n");
 
 	for (i = 0; i < rd->n_reg_rules; i++) {
 		reg_rule = &rd->reg_rules[i];
@@ -2273,23 +2284,32 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
 			snprintf(bw, sizeof(bw), "%d KHz",
 				 freq_range->max_bandwidth_khz);
 
+		if (reg_rule->flags & NL80211_RRF_DFS)
+			scnprintf(cac_time, sizeof(cac_time), "%u s",
+				  reg_rule->dfs_cac_ms/1000);
+		else
+			scnprintf(cac_time, sizeof(cac_time), "N/A");
+
+
 		/*
 		 * There may not be documentation for max antenna gain
 		 * in certain regions
 		 */
 		if (power_rule->max_antenna_gain)
-			pr_info("  (%d KHz - %d KHz @ %s), (%d mBi, %d mBm)\n",
+			pr_info("  (%d KHz - %d KHz @ %s), (%d mBi, %d mBm), (%s)\n",
 				freq_range->start_freq_khz,
 				freq_range->end_freq_khz,
 				bw,
 				power_rule->max_antenna_gain,
-				power_rule->max_eirp);
+				power_rule->max_eirp,
+				cac_time);
 		else
-			pr_info("  (%d KHz - %d KHz @ %s), (N/A, %d mBm)\n",
+			pr_info("  (%d KHz - %d KHz @ %s), (N/A, %d mBm), (%s)\n",
 				freq_range->start_freq_khz,
 				freq_range->end_freq_khz,
 				bw,
-				power_rule->max_eirp);
+				power_rule->max_eirp,
+				cac_time);
 	}
 }
 
-- 
cgit v1.2.3


From 1ef3e2bc04223ff956dc62abaf2dff1f3322a431 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 26 Feb 2014 11:38:36 -0700
Subject: vfio/iommu_type1: Multi-IOMMU domain support

We currently have a problem that we cannot support advanced features
of an IOMMU domain (ex. IOMMU_CACHE), because we have no guarantee
that those features will be supported by all of the hardware units
involved with the domain over its lifetime.  For instance, the Intel
VT-d architecture does not require that all DRHDs support snoop
control.  If we create a domain based on a device behind a DRHD that
does support snoop control and enable SNP support via the IOMMU_CACHE
mapping option, we cannot then add a device behind a DRHD which does
not support snoop control or we'll get reserved bit faults from the
SNP bit in the pagetables.  To add to the complexity, we can't know
the properties of a domain until a device is attached.

We could pass this problem off to userspace and require that a
separate vfio container be used, but we don't know how to handle page
accounting in that case.  How do we know that a page pinned in one
container is the same page as a different container and avoid double
billing the user for the page.

The solution is therefore to support multiple IOMMU domains per
container.  In the majority of cases, only one domain will be required
since hardware is typically consistent within a system.  However, this
provides us the ability to validate compatibility of domains and
support mixed environments where page table flags can be different
between domains.

To do this, our DMA tracking needs to change.  We currently try to
coalesce user mappings into as few tracking entries as possible.  The
problem then becomes that we lose granularity of user mappings.  We've
never guaranteed that a user is able to unmap at a finer granularity
than the original mapping, but we must honor the granularity of the
original mapping.  This coalescing code is therefore removed, allowing
only unmaps covering complete maps.  The change in accounting is
fairly small here, a typical QEMU VM will start out with roughly a
dozen entries, so it's arguable if this coalescing was ever needed.

We also move IOMMU domain creation to the point where a group is
attached to the container.  An interesting side-effect of this is that
we now have access to the device at the time of domain creation and
can probe the devices within the group to determine the bus_type.
This finally makes vfio_iommu_type1 completely device/bus agnostic.
In fact, each IOMMU domain can host devices on different buses managed
by different physical IOMMUs, and present a single DMA mapping
interface to the user.  When a new domain is created, mappings are
replayed to bring the IOMMU pagetables up to the state of the current
container.  And of course, DMA mapping and unmapping automatically
traverse all of the configured IOMMU domains.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Cc: Varun Sethi <Varun.Sethi@freescale.com>
---
 drivers/vfio/vfio_iommu_type1.c | 637 +++++++++++++++++++++-------------------
 include/uapi/linux/vfio.h       |   1 +
 2 files changed, 336 insertions(+), 302 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 4fb7a8f83c8a..8c7bb9befdab 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -30,7 +30,6 @@
 #include <linux/iommu.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/pci.h>		/* pci_bus_type */
 #include <linux/rbtree.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
@@ -55,11 +54,17 @@ MODULE_PARM_DESC(disable_hugepages,
 		 "Disable VFIO IOMMU support for IOMMU hugepages.");
 
 struct vfio_iommu {
-	struct iommu_domain	*domain;
+	struct list_head	domain_list;
 	struct mutex		lock;
 	struct rb_root		dma_list;
+	bool v2;
+};
+
+struct vfio_domain {
+	struct iommu_domain	*domain;
+	struct list_head	next;
 	struct list_head	group_list;
-	bool			cache;
+	int			prot;		/* IOMMU_CACHE */
 };
 
 struct vfio_dma {
@@ -99,7 +104,7 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu,
 	return NULL;
 }
 
-static void vfio_insert_dma(struct vfio_iommu *iommu, struct vfio_dma *new)
+static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new)
 {
 	struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL;
 	struct vfio_dma *dma;
@@ -118,7 +123,7 @@ static void vfio_insert_dma(struct vfio_iommu *iommu, struct vfio_dma *new)
 	rb_insert_color(&new->node, &iommu->dma_list);
 }
 
-static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *old)
+static void vfio_unlink_dma(struct vfio_iommu *iommu, struct vfio_dma *old)
 {
 	rb_erase(&old->node, &iommu->dma_list);
 }
@@ -322,32 +327,39 @@ static long vfio_unpin_pages(unsigned long pfn, long npage,
 	return unlocked;
 }
 
-static int vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
-			    dma_addr_t iova, size_t *size)
+static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
 {
-	dma_addr_t start = iova, end = iova + *size;
+	dma_addr_t iova = dma->iova, end = dma->iova + dma->size;
+	struct vfio_domain *domain, *d;
 	long unlocked = 0;
 
+	if (!dma->size)
+		return;
+	/*
+	 * We use the IOMMU to track the physical addresses, otherwise we'd
+	 * need a much more complicated tracking system.  Unfortunately that
+	 * means we need to use one of the iommu domains to figure out the
+	 * pfns to unpin.  The rest need to be unmapped in advance so we have
+	 * no iommu translations remaining when the pages are unpinned.
+	 */
+	domain = d = list_first_entry(&iommu->domain_list,
+				      struct vfio_domain, next);
+
+	list_for_each_entry_continue(d, &iommu->domain_list, next)
+		iommu_unmap(d->domain, dma->iova, dma->size);
+
 	while (iova < end) {
 		size_t unmapped;
 		phys_addr_t phys;
 
-		/*
-		 * We use the IOMMU to track the physical address.  This
-		 * saves us from having a lot more entries in our mapping
-		 * tree.  The downside is that we don't track the size
-		 * used to do the mapping.  We request unmap of a single
-		 * page, but expect IOMMUs that support large pages to
-		 * unmap a larger chunk.
-		 */
-		phys = iommu_iova_to_phys(iommu->domain, iova);
+		phys = iommu_iova_to_phys(domain->domain, iova);
 		if (WARN_ON(!phys)) {
 			iova += PAGE_SIZE;
 			continue;
 		}
 
-		unmapped = iommu_unmap(iommu->domain, iova, PAGE_SIZE);
-		if (!unmapped)
+		unmapped = iommu_unmap(domain->domain, iova, PAGE_SIZE);
+		if (WARN_ON(!unmapped))
 			break;
 
 		unlocked += vfio_unpin_pages(phys >> PAGE_SHIFT,
@@ -357,119 +369,26 @@ static int vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
 	}
 
 	vfio_lock_acct(-unlocked);
-
-	*size = iova - start;
-
-	return 0;
 }
 
-static int vfio_remove_dma_overlap(struct vfio_iommu *iommu, dma_addr_t start,
-				   size_t *size, struct vfio_dma *dma)
+static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
 {
-	size_t offset, overlap, tmp;
-	struct vfio_dma *split;
-	int ret;
-
-	if (!*size)
-		return 0;
-
-	/*
-	 * Existing dma region is completely covered, unmap all.  This is
-	 * the likely case since userspace tends to map and unmap buffers
-	 * in one shot rather than multiple mappings within a buffer.
-	 */
-	if (likely(start <= dma->iova &&
-		   start + *size >= dma->iova + dma->size)) {
-		*size = dma->size;
-		ret = vfio_unmap_unpin(iommu, dma, dma->iova, size);
-		if (ret)
-			return ret;
-
-		/*
-		 * Did we remove more than we have?  Should never happen
-		 * since a vfio_dma is contiguous in iova and vaddr.
-		 */
-		WARN_ON(*size != dma->size);
-
-		vfio_remove_dma(iommu, dma);
-		kfree(dma);
-		return 0;
-	}
-
-	/* Overlap low address of existing range */
-	if (start <= dma->iova) {
-		overlap = start + *size - dma->iova;
-		ret = vfio_unmap_unpin(iommu, dma, dma->iova, &overlap);
-		if (ret)
-			return ret;
-
-		vfio_remove_dma(iommu, dma);
-
-		/*
-		 * Check, we may have removed to whole vfio_dma.  If not
-		 * fixup and re-insert.
-		 */
-		if (overlap < dma->size) {
-			dma->iova += overlap;
-			dma->vaddr += overlap;
-			dma->size -= overlap;
-			vfio_insert_dma(iommu, dma);
-		} else
-			kfree(dma);
-
-		*size = overlap;
-		return 0;
-	}
-
-	/* Overlap high address of existing range */
-	if (start + *size >= dma->iova + dma->size) {
-		offset = start - dma->iova;
-		overlap = dma->size - offset;
-
-		ret = vfio_unmap_unpin(iommu, dma, start, &overlap);
-		if (ret)
-			return ret;
-
-		dma->size -= overlap;
-		*size = overlap;
-		return 0;
-	}
-
-	/* Split existing */
-
-	/*
-	 * Allocate our tracking structure early even though it may not
-	 * be used.  An Allocation failure later loses track of pages and
-	 * is more difficult to unwind.
-	 */
-	split = kzalloc(sizeof(*split), GFP_KERNEL);
-	if (!split)
-		return -ENOMEM;
-
-	offset = start - dma->iova;
-
-	ret = vfio_unmap_unpin(iommu, dma, start, size);
-	if (ret || !*size) {
-		kfree(split);
-		return ret;
-	}
-
-	tmp = dma->size;
+	vfio_unmap_unpin(iommu, dma);
+	vfio_unlink_dma(iommu, dma);
+	kfree(dma);
+}
 
-	/* Resize the lower vfio_dma in place, before the below insert */
-	dma->size = offset;
+static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu)
+{
+	struct vfio_domain *domain;
+	unsigned long bitmap = PAGE_MASK;
 
-	/* Insert new for remainder, assuming it didn't all get unmapped */
-	if (likely(offset + *size < tmp)) {
-		split->size = tmp - offset - *size;
-		split->iova = dma->iova + offset + *size;
-		split->vaddr = dma->vaddr + offset + *size;
-		split->prot = dma->prot;
-		vfio_insert_dma(iommu, split);
-	} else
-		kfree(split);
+	mutex_lock(&iommu->lock);
+	list_for_each_entry(domain, &iommu->domain_list, next)
+		bitmap &= domain->domain->ops->pgsize_bitmap;
+	mutex_unlock(&iommu->lock);
 
-	return 0;
+	return bitmap;
 }
 
 static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
@@ -477,10 +396,10 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 {
 	uint64_t mask;
 	struct vfio_dma *dma;
-	size_t unmapped = 0, size;
+	size_t unmapped = 0;
 	int ret = 0;
 
-	mask = ((uint64_t)1 << __ffs(iommu->domain->ops->pgsize_bitmap)) - 1;
+	mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1;
 
 	if (unmap->iova & mask)
 		return -EINVAL;
@@ -491,20 +410,61 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 
 	mutex_lock(&iommu->lock);
 
+	/*
+	 * vfio-iommu-type1 (v1) - User mappings were coalesced together to
+	 * avoid tracking individual mappings.  This means that the granularity
+	 * of the original mapping was lost and the user was allowed to attempt
+	 * to unmap any range.  Depending on the contiguousness of physical
+	 * memory and page sizes supported by the IOMMU, arbitrary unmaps may
+	 * or may not have worked.  We only guaranteed unmap granularity
+	 * matching the original mapping; even though it was untracked here,
+	 * the original mappings are reflected in IOMMU mappings.  This
+	 * resulted in a couple unusual behaviors.  First, if a range is not
+	 * able to be unmapped, ex. a set of 4k pages that was mapped as a
+	 * 2M hugepage into the IOMMU, the unmap ioctl returns success but with
+	 * a zero sized unmap.  Also, if an unmap request overlaps the first
+	 * address of a hugepage, the IOMMU will unmap the entire hugepage.
+	 * This also returns success and the returned unmap size reflects the
+	 * actual size unmapped.
+	 *
+	 * We attempt to maintain compatibility with this "v1" interface, but
+	 * we take control out of the hands of the IOMMU.  Therefore, an unmap
+	 * request offset from the beginning of the original mapping will
+	 * return success with zero sized unmap.  And an unmap request covering
+	 * the first iova of mapping will unmap the entire range.
+	 *
+	 * The v2 version of this interface intends to be more deterministic.
+	 * Unmap requests must fully cover previous mappings.  Multiple
+	 * mappings may still be unmaped by specifying large ranges, but there
+	 * must not be any previous mappings bisected by the range.  An error
+	 * will be returned if these conditions are not met.  The v2 interface
+	 * will only return success and a size of zero if there were no
+	 * mappings within the range.
+	 */
+	if (iommu->v2) {
+		dma = vfio_find_dma(iommu, unmap->iova, 0);
+		if (dma && dma->iova != unmap->iova) {
+			ret = -EINVAL;
+			goto unlock;
+		}
+		dma = vfio_find_dma(iommu, unmap->iova + unmap->size - 1, 0);
+		if (dma && dma->iova + dma->size != unmap->iova + unmap->size) {
+			ret = -EINVAL;
+			goto unlock;
+		}
+	}
+
 	while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) {
-		size = unmap->size;
-		ret = vfio_remove_dma_overlap(iommu, unmap->iova, &size, dma);
-		if (ret || !size)
+		if (!iommu->v2 && unmap->iova > dma->iova)
 			break;
-		unmapped += size;
+		unmapped += dma->size;
+		vfio_remove_dma(iommu, dma);
 	}
 
+unlock:
 	mutex_unlock(&iommu->lock);
 
-	/*
-	 * We may unmap more than requested, update the unmap struct so
-	 * userspace can know.
-	 */
+	/* Report how much was unmapped */
 	unmap->size = unmapped;
 
 	return ret;
@@ -516,22 +476,47 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
  * soon, so this is just a temporary workaround to break mappings down into
  * PAGE_SIZE.  Better to map smaller pages than nothing.
  */
-static int map_try_harder(struct vfio_iommu *iommu, dma_addr_t iova,
+static int map_try_harder(struct vfio_domain *domain, dma_addr_t iova,
 			  unsigned long pfn, long npage, int prot)
 {
 	long i;
 	int ret;
 
 	for (i = 0; i < npage; i++, pfn++, iova += PAGE_SIZE) {
-		ret = iommu_map(iommu->domain, iova,
+		ret = iommu_map(domain->domain, iova,
 				(phys_addr_t)pfn << PAGE_SHIFT,
-				PAGE_SIZE, prot);
+				PAGE_SIZE, prot | domain->prot);
 		if (ret)
 			break;
 	}
 
 	for (; i < npage && i > 0; i--, iova -= PAGE_SIZE)
-		iommu_unmap(iommu->domain, iova, PAGE_SIZE);
+		iommu_unmap(domain->domain, iova, PAGE_SIZE);
+
+	return ret;
+}
+
+static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova,
+			  unsigned long pfn, long npage, int prot)
+{
+	struct vfio_domain *d;
+	int ret;
+
+	list_for_each_entry(d, &iommu->domain_list, next) {
+		ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT,
+				npage << PAGE_SHIFT, prot | d->prot);
+		if (ret) {
+			if (ret != -EBUSY ||
+			    map_try_harder(d, iova, pfn, npage, prot))
+				goto unwind;
+		}
+	}
+
+	return 0;
+
+unwind:
+	list_for_each_entry_continue_reverse(d, &iommu->domain_list, next)
+		iommu_unmap(d->domain, iova, npage << PAGE_SHIFT);
 
 	return ret;
 }
@@ -545,12 +530,12 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
 	long npage;
 	int ret = 0, prot = 0;
 	uint64_t mask;
-	struct vfio_dma *dma = NULL;
+	struct vfio_dma *dma;
 	unsigned long pfn;
 
 	end = map->iova + map->size;
 
-	mask = ((uint64_t)1 << __ffs(iommu->domain->ops->pgsize_bitmap)) - 1;
+	mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1;
 
 	/* READ/WRITE from device perspective */
 	if (map->flags & VFIO_DMA_MAP_FLAG_WRITE)
@@ -561,9 +546,6 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
 	if (!prot)
 		return -EINVAL; /* No READ/WRITE? */
 
-	if (iommu->cache)
-		prot |= IOMMU_CACHE;
-
 	if (vaddr & mask)
 		return -EINVAL;
 	if (map->iova & mask)
@@ -588,180 +570,257 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
 		return -EEXIST;
 	}
 
-	for (iova = map->iova; iova < end; iova += size, vaddr += size) {
-		long i;
+	dma = kzalloc(sizeof(*dma), GFP_KERNEL);
+	if (!dma) {
+		mutex_unlock(&iommu->lock);
+		return -ENOMEM;
+	}
+
+	dma->iova = map->iova;
+	dma->vaddr = map->vaddr;
+	dma->prot = prot;
 
+	/* Insert zero-sized and grow as we map chunks of it */
+	vfio_link_dma(iommu, dma);
+
+	for (iova = map->iova; iova < end; iova += size, vaddr += size) {
 		/* Pin a contiguous chunk of memory */
 		npage = vfio_pin_pages(vaddr, (end - iova) >> PAGE_SHIFT,
 				       prot, &pfn);
 		if (npage <= 0) {
 			WARN_ON(!npage);
 			ret = (int)npage;
-			goto out;
-		}
-
-		/* Verify pages are not already mapped */
-		for (i = 0; i < npage; i++) {
-			if (iommu_iova_to_phys(iommu->domain,
-					       iova + (i << PAGE_SHIFT))) {
-				ret = -EBUSY;
-				goto out_unpin;
-			}
+			break;
 		}
 
-		ret = iommu_map(iommu->domain, iova,
-				(phys_addr_t)pfn << PAGE_SHIFT,
-				npage << PAGE_SHIFT, prot);
+		/* Map it! */
+		ret = vfio_iommu_map(iommu, iova, pfn, npage, prot);
 		if (ret) {
-			if (ret != -EBUSY ||
-			    map_try_harder(iommu, iova, pfn, npage, prot)) {
-				goto out_unpin;
-			}
+			vfio_unpin_pages(pfn, npage, prot, true);
+			break;
 		}
 
 		size = npage << PAGE_SHIFT;
+		dma->size += size;
+	}
 
-		/*
-		 * Check if we abut a region below - nothing below 0.
-		 * This is the most likely case when mapping chunks of
-		 * physically contiguous regions within a virtual address
-		 * range.  Update the abutting entry in place since iova
-		 * doesn't change.
-		 */
-		if (likely(iova)) {
-			struct vfio_dma *tmp;
-			tmp = vfio_find_dma(iommu, iova - 1, 1);
-			if (tmp && tmp->prot == prot &&
-			    tmp->vaddr + tmp->size == vaddr) {
-				tmp->size += size;
-				iova = tmp->iova;
-				size = tmp->size;
-				vaddr = tmp->vaddr;
-				dma = tmp;
-			}
-		}
+	if (ret)
+		vfio_remove_dma(iommu, dma);
 
-		/*
-		 * Check if we abut a region above - nothing above ~0 + 1.
-		 * If we abut above and below, remove and free.  If only
-		 * abut above, remove, modify, reinsert.
-		 */
-		if (likely(iova + size)) {
-			struct vfio_dma *tmp;
-			tmp = vfio_find_dma(iommu, iova + size, 1);
-			if (tmp && tmp->prot == prot &&
-			    tmp->vaddr == vaddr + size) {
-				vfio_remove_dma(iommu, tmp);
-				if (dma) {
-					dma->size += tmp->size;
-					kfree(tmp);
-				} else {
-					size += tmp->size;
-					tmp->size = size;
-					tmp->iova = iova;
-					tmp->vaddr = vaddr;
-					vfio_insert_dma(iommu, tmp);
-					dma = tmp;
-				}
-			}
-		}
+	mutex_unlock(&iommu->lock);
+	return ret;
+}
+
+static int vfio_bus_type(struct device *dev, void *data)
+{
+	struct bus_type **bus = data;
+
+	if (*bus && *bus != dev->bus)
+		return -EINVAL;
+
+	*bus = dev->bus;
+
+	return 0;
+}
+
+static int vfio_iommu_replay(struct vfio_iommu *iommu,
+			     struct vfio_domain *domain)
+{
+	struct vfio_domain *d;
+	struct rb_node *n;
+	int ret;
+
+	/* Arbitrarily pick the first domain in the list for lookups */
+	d = list_first_entry(&iommu->domain_list, struct vfio_domain, next);
+	n = rb_first(&iommu->dma_list);
+
+	/* If there's not a domain, there better not be any mappings */
+	if (WARN_ON(n && !d))
+		return -EINVAL;
+
+	for (; n; n = rb_next(n)) {
+		struct vfio_dma *dma;
+		dma_addr_t iova;
+
+		dma = rb_entry(n, struct vfio_dma, node);
+		iova = dma->iova;
+
+		while (iova < dma->iova + dma->size) {
+			phys_addr_t phys = iommu_iova_to_phys(d->domain, iova);
+			size_t size;
 
-		if (!dma) {
-			dma = kzalloc(sizeof(*dma), GFP_KERNEL);
-			if (!dma) {
-				iommu_unmap(iommu->domain, iova, size);
-				ret = -ENOMEM;
-				goto out_unpin;
+			if (WARN_ON(!phys)) {
+				iova += PAGE_SIZE;
+				continue;
 			}
 
-			dma->size = size;
-			dma->iova = iova;
-			dma->vaddr = vaddr;
-			dma->prot = prot;
-			vfio_insert_dma(iommu, dma);
-		}
-	}
+			size = PAGE_SIZE;
 
-	WARN_ON(ret);
-	mutex_unlock(&iommu->lock);
-	return ret;
+			while (iova + size < dma->iova + dma->size &&
+			       phys + size == iommu_iova_to_phys(d->domain,
+								 iova + size))
+				size += PAGE_SIZE;
 
-out_unpin:
-	vfio_unpin_pages(pfn, npage, prot, true);
+			ret = iommu_map(domain->domain, iova, phys,
+					size, dma->prot | domain->prot);
+			if (ret)
+				return ret;
 
-out:
-	iova = map->iova;
-	size = map->size;
-	while ((dma = vfio_find_dma(iommu, iova, size))) {
-		int r = vfio_remove_dma_overlap(iommu, iova,
-						&size, dma);
-		if (WARN_ON(r || !size))
-			break;
+			iova += size;
+		}
 	}
 
-	mutex_unlock(&iommu->lock);
-	return ret;
+	return 0;
 }
 
 static int vfio_iommu_type1_attach_group(void *iommu_data,
 					 struct iommu_group *iommu_group)
 {
 	struct vfio_iommu *iommu = iommu_data;
-	struct vfio_group *group, *tmp;
+	struct vfio_group *group, *g;
+	struct vfio_domain *domain, *d;
+	struct bus_type *bus = NULL;
 	int ret;
 
-	group = kzalloc(sizeof(*group), GFP_KERNEL);
-	if (!group)
-		return -ENOMEM;
-
 	mutex_lock(&iommu->lock);
 
-	list_for_each_entry(tmp, &iommu->group_list, next) {
-		if (tmp->iommu_group == iommu_group) {
+	list_for_each_entry(d, &iommu->domain_list, next) {
+		list_for_each_entry(g, &d->group_list, next) {
+			if (g->iommu_group != iommu_group)
+				continue;
+
 			mutex_unlock(&iommu->lock);
-			kfree(group);
 			return -EINVAL;
 		}
 	}
 
+	group = kzalloc(sizeof(*group), GFP_KERNEL);
+	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+	if (!group || !domain) {
+		ret = -ENOMEM;
+		goto out_free;
+	}
+
+	group->iommu_group = iommu_group;
+
+	/* Determine bus_type in order to allocate a domain */
+	ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type);
+	if (ret)
+		goto out_free;
+
+	domain->domain = iommu_domain_alloc(bus);
+	if (!domain->domain) {
+		ret = -EIO;
+		goto out_free;
+	}
+
+	ret = iommu_attach_group(domain->domain, iommu_group);
+	if (ret)
+		goto out_domain;
+
+	INIT_LIST_HEAD(&domain->group_list);
+	list_add(&group->next, &domain->group_list);
+
+	if (!allow_unsafe_interrupts &&
+	    !iommu_domain_has_cap(domain->domain, IOMMU_CAP_INTR_REMAP)) {
+		pr_warn("%s: No interrupt remapping support.  Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",
+		       __func__);
+		ret = -EPERM;
+		goto out_detach;
+	}
+
+	if (iommu_domain_has_cap(domain->domain, IOMMU_CAP_CACHE_COHERENCY))
+		domain->prot |= IOMMU_CACHE;
+
 	/*
-	 * TODO: Domain have capabilities that might change as we add
-	 * groups (see iommu->cache, currently never set).  Check for
-	 * them and potentially disallow groups to be attached when it
-	 * would change capabilities (ugh).
+	 * Try to match an existing compatible domain.  We don't want to
+	 * preclude an IOMMU driver supporting multiple bus_types and being
+	 * able to include different bus_types in the same IOMMU domain, so
+	 * we test whether the domains use the same iommu_ops rather than
+	 * testing if they're on the same bus_type.
 	 */
-	ret = iommu_attach_group(iommu->domain, iommu_group);
-	if (ret) {
-		mutex_unlock(&iommu->lock);
-		kfree(group);
-		return ret;
+	list_for_each_entry(d, &iommu->domain_list, next) {
+		if (d->domain->ops == domain->domain->ops &&
+		    d->prot == domain->prot) {
+			iommu_detach_group(domain->domain, iommu_group);
+			if (!iommu_attach_group(d->domain, iommu_group)) {
+				list_add(&group->next, &d->group_list);
+				iommu_domain_free(domain->domain);
+				kfree(domain);
+				mutex_unlock(&iommu->lock);
+				return 0;
+			}
+
+			ret = iommu_attach_group(domain->domain, iommu_group);
+			if (ret)
+				goto out_domain;
+		}
 	}
 
-	group->iommu_group = iommu_group;
-	list_add(&group->next, &iommu->group_list);
+	/* replay mappings on new domains */
+	ret = vfio_iommu_replay(iommu, domain);
+	if (ret)
+		goto out_detach;
+
+	list_add(&domain->next, &iommu->domain_list);
 
 	mutex_unlock(&iommu->lock);
 
 	return 0;
+
+out_detach:
+	iommu_detach_group(domain->domain, iommu_group);
+out_domain:
+	iommu_domain_free(domain->domain);
+out_free:
+	kfree(domain);
+	kfree(group);
+	mutex_unlock(&iommu->lock);
+	return ret;
+}
+
+static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu)
+{
+	struct rb_node *node;
+
+	while ((node = rb_first(&iommu->dma_list)))
+		vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node));
 }
 
 static void vfio_iommu_type1_detach_group(void *iommu_data,
 					  struct iommu_group *iommu_group)
 {
 	struct vfio_iommu *iommu = iommu_data;
+	struct vfio_domain *domain;
 	struct vfio_group *group;
 
 	mutex_lock(&iommu->lock);
 
-	list_for_each_entry(group, &iommu->group_list, next) {
-		if (group->iommu_group == iommu_group) {
-			iommu_detach_group(iommu->domain, iommu_group);
+	list_for_each_entry(domain, &iommu->domain_list, next) {
+		list_for_each_entry(group, &domain->group_list, next) {
+			if (group->iommu_group != iommu_group)
+				continue;
+
+			iommu_detach_group(domain->domain, iommu_group);
 			list_del(&group->next);
 			kfree(group);
-			break;
+			/*
+			 * Group ownership provides privilege, if the group
+			 * list is empty, the domain goes away.  If it's the
+			 * last domain, then all the mappings go away too.
+			 */
+			if (list_empty(&domain->group_list)) {
+				if (list_is_singular(&iommu->domain_list))
+					vfio_iommu_unmap_unpin_all(iommu);
+				iommu_domain_free(domain->domain);
+				list_del(&domain->next);
+				kfree(domain);
+			}
+			goto done;
 		}
 	}
 
+done:
 	mutex_unlock(&iommu->lock);
 }
 
@@ -769,40 +828,17 @@ static void *vfio_iommu_type1_open(unsigned long arg)
 {
 	struct vfio_iommu *iommu;
 
-	if (arg != VFIO_TYPE1_IOMMU)
+	if (arg != VFIO_TYPE1_IOMMU && arg != VFIO_TYPE1v2_IOMMU)
 		return ERR_PTR(-EINVAL);
 
 	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
 	if (!iommu)
 		return ERR_PTR(-ENOMEM);
 
-	INIT_LIST_HEAD(&iommu->group_list);
+	INIT_LIST_HEAD(&iommu->domain_list);
 	iommu->dma_list = RB_ROOT;
 	mutex_init(&iommu->lock);
-
-	/*
-	 * Wish we didn't have to know about bus_type here.
-	 */
-	iommu->domain = iommu_domain_alloc(&pci_bus_type);
-	if (!iommu->domain) {
-		kfree(iommu);
-		return ERR_PTR(-EIO);
-	}
-
-	/*
-	 * Wish we could specify required capabilities rather than create
-	 * a domain, see what comes out and hope it doesn't change along
-	 * the way.  Fortunately we know interrupt remapping is global for
-	 * our iommus.
-	 */
-	if (!allow_unsafe_interrupts &&
-	    !iommu_domain_has_cap(iommu->domain, IOMMU_CAP_INTR_REMAP)) {
-		pr_warn("%s: No interrupt remapping support.  Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",
-		       __func__);
-		iommu_domain_free(iommu->domain);
-		kfree(iommu);
-		return ERR_PTR(-EPERM);
-	}
+	iommu->v2 = (arg == VFIO_TYPE1v2_IOMMU);
 
 	return iommu;
 }
@@ -810,25 +846,24 @@ static void *vfio_iommu_type1_open(unsigned long arg)
 static void vfio_iommu_type1_release(void *iommu_data)
 {
 	struct vfio_iommu *iommu = iommu_data;
+	struct vfio_domain *domain, *domain_tmp;
 	struct vfio_group *group, *group_tmp;
-	struct rb_node *node;
 
-	list_for_each_entry_safe(group, group_tmp, &iommu->group_list, next) {
-		iommu_detach_group(iommu->domain, group->iommu_group);
-		list_del(&group->next);
-		kfree(group);
-	}
+	vfio_iommu_unmap_unpin_all(iommu);
 
-	while ((node = rb_first(&iommu->dma_list))) {
-		struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node);
-		size_t size = dma->size;
-		vfio_remove_dma_overlap(iommu, dma->iova, &size, dma);
-		if (WARN_ON(!size))
-			break;
+	list_for_each_entry_safe(domain, domain_tmp,
+				 &iommu->domain_list, next) {
+		list_for_each_entry_safe(group, group_tmp,
+					 &domain->group_list, next) {
+			iommu_detach_group(domain->domain, group->iommu_group);
+			list_del(&group->next);
+			kfree(group);
+		}
+		iommu_domain_free(domain->domain);
+		list_del(&domain->next);
+		kfree(domain);
 	}
 
-	iommu_domain_free(iommu->domain);
-	iommu->domain = NULL;
 	kfree(iommu);
 }
 
@@ -841,6 +876,7 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
 	if (cmd == VFIO_CHECK_EXTENSION) {
 		switch (arg) {
 		case VFIO_TYPE1_IOMMU:
+		case VFIO_TYPE1v2_IOMMU:
 			return 1;
 		default:
 			return 0;
@@ -858,7 +894,7 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
 
 		info.flags = 0;
 
-		info.iova_pgsizes = iommu->domain->ops->pgsize_bitmap;
+		info.iova_pgsizes = vfio_pgsize_bitmap(iommu);
 
 		return copy_to_user((void __user *)arg, &info, minsz);
 
@@ -911,9 +947,6 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
 
 static int __init vfio_iommu_type1_init(void)
 {
-	if (!iommu_present(&pci_bus_type))
-		return -ENODEV;
-
 	return vfio_register_iommu_driver(&vfio_iommu_driver_ops_type1);
 }
 
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 0fd47f5bc146..460fdf2e26f1 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -23,6 +23,7 @@
 
 #define VFIO_TYPE1_IOMMU		1
 #define VFIO_SPAPR_TCE_IOMMU		2
+#define VFIO_TYPE1v2_IOMMU		3
 
 /*
  * The IOCTL interface is designed for extensibility by embedding the
-- 
cgit v1.2.3


From aa429318279b90192f35a97e9ccdc1e83b3a9624 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 26 Feb 2014 11:38:37 -0700
Subject: vfio/type1: Add extension to test DMA cache coherence of IOMMU

Now that the type1 IOMMU backend can support IOMMU_CACHE, we need to
be able to test whether coherency is currently enforced.  Add an
extension for this.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio_iommu_type1.c | 21 +++++++++++++++++++++
 include/uapi/linux/vfio.h       |  5 +++++
 2 files changed, 26 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 8c7bb9befdab..1f90344d3e2c 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -867,6 +867,23 @@ static void vfio_iommu_type1_release(void *iommu_data)
 	kfree(iommu);
 }
 
+static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
+{
+	struct vfio_domain *domain;
+	int ret = 1;
+
+	mutex_lock(&iommu->lock);
+	list_for_each_entry(domain, &iommu->domain_list, next) {
+		if (!(domain->prot & IOMMU_CACHE)) {
+			ret = 0;
+			break;
+		}
+	}
+	mutex_unlock(&iommu->lock);
+
+	return ret;
+}
+
 static long vfio_iommu_type1_ioctl(void *iommu_data,
 				   unsigned int cmd, unsigned long arg)
 {
@@ -878,6 +895,10 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
 		case VFIO_TYPE1_IOMMU:
 		case VFIO_TYPE1v2_IOMMU:
 			return 1;
+		case VFIO_DMA_CC_IOMMU:
+			if (!iommu)
+				return 0;
+			return vfio_domains_have_iommu_cache(iommu);
 		default:
 			return 0;
 		}
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 460fdf2e26f1..cb9023d4f063 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -24,6 +24,11 @@
 #define VFIO_TYPE1_IOMMU		1
 #define VFIO_SPAPR_TCE_IOMMU		2
 #define VFIO_TYPE1v2_IOMMU		3
+/*
+ * IOMMU enforces DMA cache coherence (ex. PCIe NoSnoop stripping).  This
+ * capability is subject to change as groups are added or removed.
+ */
+#define VFIO_DMA_CC_IOMMU		4
 
 /*
  * The IOCTL interface is designed for extensibility by embedding the
-- 
cgit v1.2.3


From 2ebe21fdde1c92ef1654f23d29194145be0e1ddc Mon Sep 17 00:00:00 2001
From: Neil Jerram <Neil.Jerram@metaswitch.com>
Date: Tue, 25 Feb 2014 11:17:25 +0000
Subject: net: order MPLS ethertypes numerically

All ethertypes other than ETH_P_MPLS_UC, ETH_P_MPLS_MC and
ETH_P_ATMMPOA were already ordered numerically.  This commit moves
those three ETH_P_... values into correct numerical order too.

Signed-off-by: Neil Jerram <Neil.Jerram@metaswitch.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_ether.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 2ce0f6a78fa5..ba8b15f07940 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -68,11 +68,11 @@
 #define ETH_P_SLOW	0x8809		/* Slow Protocol. See 802.3ad 43B */
 #define ETH_P_WCCP	0x883E		/* Web-cache coordination protocol
 					 * defined in draft-wilson-wrec-wccp-v2-00.txt */
-#define ETH_P_PPP_DISC	0x8863		/* PPPoE discovery messages     */
-#define ETH_P_PPP_SES	0x8864		/* PPPoE session messages	*/
 #define ETH_P_MPLS_UC	0x8847		/* MPLS Unicast traffic		*/
 #define ETH_P_MPLS_MC	0x8848		/* MPLS Multicast traffic	*/
 #define ETH_P_ATMMPOA	0x884c		/* MultiProtocol Over ATM	*/
+#define ETH_P_PPP_DISC	0x8863		/* PPPoE discovery messages     */
+#define ETH_P_PPP_SES	0x8864		/* PPPoE session messages	*/
 #define ETH_P_LINK_CTL	0x886c		/* HPNA, wlan link local tunnel */
 #define ETH_P_ATMFATE	0x8884		/* Frame-based ATM Transport
 					 * over Ethernet
-- 
cgit v1.2.3


From 8e165e20348b53583f6c6cb45865b89209f4aa13 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 25 Feb 2014 14:34:32 +0100
Subject: net: tcp: add mib counters to track zero window transitions

Three counters are added:
- one to track when we went from non-zero to zero window
- one to track the reverse
- one counter incremented when we want to announce zero window,
  but can't because we would shrink current window.

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/snmp.h |  3 +++
 net/ipv4/proc.c           |  3 +++
 net/ipv4/tcp_output.c     | 12 +++++++++++-
 3 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index bbaba22f2d1b..8d64a7e5d371 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -259,6 +259,9 @@ enum
 	LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES, /* TCPSpuriousRtxHostQueues */
 	LINUX_MIB_BUSYPOLLRXPACKETS,		/* BusyPollRxPackets */
 	LINUX_MIB_TCPAUTOCORKING,		/* TCPAutoCorking */
+	LINUX_MIB_TCPFROMZEROWINDOWADV,		/* TCPFromZeroWindowAdv */
+	LINUX_MIB_TCPTOZEROWINDOWADV,		/* TCPToZeroWindowAdv */
+	LINUX_MIB_TCPWANTZEROWINDOWADV,		/* TCPWantZeroWindowAdv */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index a6c8a80ec9d6..99d2e9b6fac9 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -280,6 +280,9 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES),
 	SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS),
 	SNMP_MIB_ITEM("TCPAutoCorking", LINUX_MIB_TCPAUTOCORKING),
+	SNMP_MIB_ITEM("TCPFromZeroWindowAdv", LINUX_MIB_TCPFROMZEROWINDOWADV),
+	SNMP_MIB_ITEM("TCPToZeroWindowAdv", LINUX_MIB_TCPTOZEROWINDOWADV),
+	SNMP_MIB_ITEM("TCPWantZeroWindowAdv", LINUX_MIB_TCPWANTZEROWINDOWADV),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 21e8a9f33287..c5eadec001c1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -269,6 +269,7 @@ EXPORT_SYMBOL(tcp_select_initial_window);
 static u16 tcp_select_window(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	u32 old_win = tp->rcv_wnd;
 	u32 cur_win = tcp_receive_window(tp);
 	u32 new_win = __tcp_select_window(sk);
 
@@ -281,6 +282,9 @@ static u16 tcp_select_window(struct sock *sk)
 		 *
 		 * Relax Will Robinson.
 		 */
+		if (new_win == 0)
+			NET_INC_STATS(sock_net(sk),
+				      LINUX_MIB_TCPWANTZEROWINDOWADV);
 		new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
 	}
 	tp->rcv_wnd = new_win;
@@ -298,8 +302,14 @@ static u16 tcp_select_window(struct sock *sk)
 	new_win >>= tp->rx_opt.rcv_wscale;
 
 	/* If we advertise zero window, disable fast path. */
-	if (new_win == 0)
+	if (new_win == 0) {
 		tp->pred_flags = 0;
+		if (old_win)
+			NET_INC_STATS(sock_net(sk),
+				      LINUX_MIB_TCPTOZEROWINDOWADV);
+	} else if (old_win == 0) {
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV);
+	}
 
 	return new_win;
 }
-- 
cgit v1.2.3


From 1b346576359c72bee34b1476b4fc63d77d37b314 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 26 Feb 2014 01:20:42 +0100
Subject: ipv4: yet another new IP_MTU_DISCOVER option IP_PMTUDISC_OMIT

IP_PMTUDISC_INTERFACE has a design error: because it does not allow the
generation of fragments if the interface mtu is exceeded, it is very
hard to make use of this option in already deployed name server software
for which I introduced this option.

This patch adds yet another new IP_MTU_DISCOVER option to not honor any
path mtu information and not accepting new icmp notifications destined for
the socket this option is enabled on. But we allow outgoing fragmentation
in case the packet size exceeds the outgoing interface mtu.

As such this new option can be used as a drop-in replacement for
IP_PMTUDISC_DONT, which is currently in use by most name server software
making the adoption of this option very smooth and easy.

The original advantage of IP_PMTUDISC_INTERFACE is still maintained:
ignoring incoming path MTU updates and not honoring discovered path MTUs
in the output path.

Fixes: 482fc6094afad5 ("ipv4: introduce new IP_MTU_DISCOVER mode IP_PMTUDISC_INTERFACE")
Cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h        | 9 ++++++++-
 include/uapi/linux/in.h | 4 ++++
 net/ipv4/ip_output.c    | 9 +++------
 net/ipv4/ip_sockglue.c  | 2 +-
 4 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/ip.h b/include/net/ip.h
index 4aa781b7f609..b885d75cede4 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -266,7 +266,8 @@ int ip_dont_fragment(struct sock *sk, struct dst_entry *dst)
 
 static inline bool ip_sk_accept_pmtu(const struct sock *sk)
 {
-	return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE;
+	return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE &&
+	       inet_sk(sk)->pmtudisc != IP_PMTUDISC_OMIT;
 }
 
 static inline bool ip_sk_use_pmtu(const struct sock *sk)
@@ -274,6 +275,12 @@ static inline bool ip_sk_use_pmtu(const struct sock *sk)
 	return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE;
 }
 
+static inline bool ip_sk_local_df(const struct sock *sk)
+{
+	return inet_sk(sk)->pmtudisc < IP_PMTUDISC_DO ||
+	       inet_sk(sk)->pmtudisc == IP_PMTUDISC_OMIT;
+}
+
 static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
 						    bool forwarding)
 {
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 393c5de09d42..c33a65e3d62c 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -120,6 +120,10 @@ struct in_addr {
  * this socket to prevent accepting spoofed ones.
  */
 #define IP_PMTUDISC_INTERFACE		4
+/* weaker version of IP_PMTUDISC_INTERFACE, which allos packets to get
+ * fragmented if they exeed the interface mtu
+ */
+#define IP_PMTUDISC_OMIT		5
 
 #define IP_MULTICAST_IF			32
 #define IP_MULTICAST_TTL 		33
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 22aa11971ed1..e85445b2b102 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -824,8 +824,7 @@ static int __ip_append_data(struct sock *sk,
 
 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
-	maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ?
-			 mtu : 0xFFFF;
+	maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu;
 
 	if (cork->length + length > maxnonfragsize - fragheaderlen) {
 		ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -1148,8 +1147,7 @@ ssize_t	ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
 
 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
-	maxnonfragsize = (inet->pmtudisc >= IP_PMTUDISC_DO) ?
-			 mtu : 0xFFFF;
+	maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu;
 
 	if (cork->length + size > maxnonfragsize - fragheaderlen) {
 		ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -1310,8 +1308,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
 	 * to fragment the frame generated here. No matter, what transforms
 	 * how transforms change size of the packet, it will come out.
 	 */
-	if (inet->pmtudisc < IP_PMTUDISC_DO)
-		skb->local_df = 1;
+	skb->local_df = ip_sk_local_df(sk);
 
 	/* DF bit is set when we want to see DF on outgoing frames.
 	 * If local_df is set too, we still allow to fragment this frame
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 0968b28c4cf3..64741b938632 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -643,7 +643,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 		inet->nodefrag = val ? 1 : 0;
 		break;
 	case IP_MTU_DISCOVER:
-		if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_INTERFACE)
+		if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
 			goto e_inval;
 		inet->pmtudisc = val;
 		break;
-- 
cgit v1.2.3


From 0b95227a7ba7e69f795757cd7c839eff0615f2d1 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 26 Feb 2014 01:20:43 +0100
Subject: ipv6: yet another new IPV6_MTU_DISCOVER option IPV6_PMTUDISC_OMIT

This option has the same semantic as IP_PMTUDISC_OMIT for IPv4 which
got recently introduced. It doesn't honor the path mtu discovered by the
host but in contrary to IPV6_PMTUDISC_INTERFACE allows the generation of
fragments if the packet size exceeds the MTU of the outgoing interface
MTU.

Fixes: 93b36cf3425b9b ("ipv6: support IPV6_PMTU_INTERFACE on sockets")
Cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h  | 9 ++++++++-
 include/uapi/linux/in6.h | 4 ++++
 net/ipv6/ip6_output.c    | 9 +++++----
 net/ipv6/ipv6_sockglue.c | 2 +-
 4 files changed, 18 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 017badb1aec7..00e3f12cb2f9 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -171,7 +171,14 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 
 static inline bool ip6_sk_accept_pmtu(const struct sock *sk)
 {
-	return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE;
+	return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE &&
+	       inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT;
+}
+
+static inline bool ip6_sk_local_df(const struct sock *sk)
+{
+	return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO ||
+	       inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT;
 }
 
 static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt)
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index e9a1d2d973b6..0d8e0f0342dc 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -185,6 +185,10 @@ struct in6_flowlabel_req {
  * also see comments on IP_PMTUDISC_INTERFACE
  */
 #define IPV6_PMTUDISC_INTERFACE		4
+/* weaker version of IPV6_PMTUDISC_INTERFACE, which allows packets to
+ * get fragmented if they exceed the interface mtu
+ */
+#define IPV6_PMTUDISC_OMIT		5
 
 /* Flowlabel */
 #define IPV6_FLOWLABEL_MGR	32
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 070a2fae2375..be1b7f5a3a54 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1234,8 +1234,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 			      sizeof(struct frag_hdr) : 0) +
 			     rt->rt6i_nfheader_len;
 
-		maxnonfragsize = (np->pmtudisc >= IPV6_PMTUDISC_DO) ?
-				 mtu : sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
+		if (ip6_sk_local_df(sk))
+			maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
+		else
+			maxnonfragsize = mtu;
 
 		/* dontfrag active */
 		if ((cork->length + length > mtu - headersize) && dontfrag &&
@@ -1543,8 +1545,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	}
 
 	/* Allow local fragmentation. */
-	if (np->pmtudisc < IPV6_PMTUDISC_DO)
-		skb->local_df = 1;
+	skb->local_df = ip6_sk_local_df(sk);
 
 	*final_dst = fl6->daddr;
 	__skb_pull(skb, skb_network_header_len(skb));
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 0a00f449de5e..edb58aff4ae7 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -722,7 +722,7 @@ done:
 	case IPV6_MTU_DISCOVER:
 		if (optlen < sizeof(int))
 			goto e_inval;
-		if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_INTERFACE)
+		if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
 			goto e_inval;
 		np->pmtudisc = val;
 		retv = 0;
-- 
cgit v1.2.3


From 740b0f1841f6e39085b711d41db9ffb07198682b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 26 Feb 2014 14:02:48 -0800
Subject: tcp: switch rtt estimations to usec resolution

Upcoming congestion controls for TCP require usec resolution for RTT
estimations. Millisecond resolution is simply not enough these days.

FQ/pacing in DC environments also require this change for finer control
and removal of bimodal behavior due to the current hack in
tcp_update_pacing_rate() for 'small rtt'

TCP_CONG_RTT_STAMP is no longer needed.

As Julian Anastasov pointed out, we need to keep user compatibility :
tcp_metrics used to export RTT and RTTVAR in msec resolution,
so we added RTT_US and RTTVAR_US. An iproute2 patch is needed
to use the new attributes if provided by the kernel.

In this example ss command displays a srtt of 32 usecs (10Gbit link)

lpk51:~# ./ss -i dst lpk52
Netid  State      Recv-Q Send-Q   Local Address:Port       Peer
Address:Port
tcp    ESTAB      0      1         10.246.11.51:42959
10.246.11.52:64614
         cubic wscale:6,6 rto:201 rtt:0.032/0.001 ato:40 mss:1448
cwnd:10 send
3620.0Mbps pacing_rate 7240.0Mbps unacked:1 rcv_rtt:993 rcv_space:29559

Updated iproute2 ip command displays :

lpk51:~# ./ip tcp_metrics | grep 10.246.11.52
10.246.11.52 age 561.914sec cwnd 10 rtt 274us rttvar 213us source
10.246.11.51

Old binary displays :

lpk51:~# ip tcp_metrics | grep 10.246.11.52
10.246.11.52 age 561.914sec cwnd 10 rtt 250us rttvar 125us source
10.246.11.51

With help from Julian Anastasov, Stephen Hemminger and Yuchung Cheng

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Larry Brakmo <brakmo@google.com>
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h              |   8 +-
 include/net/tcp.h                |  10 ++-
 include/uapi/linux/tcp_metrics.h |   7 +-
 net/ipv4/tcp.c                   |   8 +-
 net/ipv4/tcp_cubic.c             |   4 -
 net/ipv4/tcp_hybla.c             |  12 +--
 net/ipv4/tcp_illinois.c          |   1 -
 net/ipv4/tcp_input.c             | 183 +++++++++++++++++++--------------------
 net/ipv4/tcp_ipv4.c              |   2 +-
 net/ipv4/tcp_lp.c                |   1 -
 net/ipv4/tcp_metrics.c           |  83 ++++++++++--------
 net/ipv4/tcp_minisocks.c         |   4 +-
 net/ipv4/tcp_output.c            |  15 ++--
 net/ipv4/tcp_probe.c             |   2 +-
 net/ipv4/tcp_vegas.c             |   1 -
 net/ipv4/tcp_veno.c              |   1 -
 net/ipv4/tcp_yeah.c              |   1 -
 17 files changed, 174 insertions(+), 169 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4ad0706d40eb..239946868142 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -201,10 +201,10 @@ struct tcp_sock {
 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP retransmit. */
 
 /* RTT measurement */
-	u32	srtt;		/* smoothed round trip time << 3	*/
-	u32	mdev;		/* medium deviation			*/
-	u32	mdev_max;	/* maximal mdev for the last rtt period	*/
-	u32	rttvar;		/* smoothed mdev_max			*/
+	u32	srtt_us;	/* smoothed round trip time << 3 in usecs */
+	u32	mdev_us;	/* medium deviation			*/
+	u32	mdev_max_us;	/* maximal mdev for the last rtt period	*/
+	u32	rttvar_us;	/* smoothed mdev_max			*/
 	u32	rtt_seq;	/* sequence number to update rttvar	*/
 
 	u32	packets_out;	/* Packets which are "in flight"	*/
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1f820537741a..93eab0b9da60 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -31,6 +31,7 @@
 #include <linux/crypto.h>
 #include <linux/cryptohash.h>
 #include <linux/kref.h>
+#include <linux/ktime.h>
 
 #include <net/inet_connection_sock.h>
 #include <net/inet_timewait_sock.h>
@@ -478,7 +479,6 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
 struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 			     struct ip_options *opt);
 #ifdef CONFIG_SYN_COOKIES
-#include <linux/ktime.h>
 
 /* Syncookies use a monotonic timer which increments every 64 seconds.
  * This counter is used both as a hash input and partially encoded into
@@ -619,7 +619,7 @@ static inline void tcp_bound_rto(const struct sock *sk)
 
 static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
 {
-	return (tp->srtt >> 3) + tp->rttvar;
+	return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us);
 }
 
 static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
@@ -656,6 +656,11 @@ static inline u32 tcp_rto_min(struct sock *sk)
 	return rto_min;
 }
 
+static inline u32 tcp_rto_min_us(struct sock *sk)
+{
+	return jiffies_to_usecs(tcp_rto_min(sk));
+}
+
 /* Compute the actual receive window we are currently advertising.
  * Rcv_nxt can be after the window if our peer push more data
  * than the offered window.
@@ -778,7 +783,6 @@ enum tcp_ca_event {
 #define TCP_CA_BUF_MAX	(TCP_CA_NAME_MAX*TCP_CA_MAX)
 
 #define TCP_CONG_NON_RESTRICTED 0x1
-#define TCP_CONG_RTT_STAMP	0x2
 
 struct tcp_congestion_ops {
 	struct list_head	list;
diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h
index 54a37b13f2c4..93533926035c 100644
--- a/include/uapi/linux/tcp_metrics.h
+++ b/include/uapi/linux/tcp_metrics.h
@@ -11,12 +11,15 @@
 #define TCP_METRICS_GENL_VERSION	0x1
 
 enum tcp_metric_index {
-	TCP_METRIC_RTT,
-	TCP_METRIC_RTTVAR,
+	TCP_METRIC_RTT,		/* in ms units */
+	TCP_METRIC_RTTVAR,	/* in ms units */
 	TCP_METRIC_SSTHRESH,
 	TCP_METRIC_CWND,
 	TCP_METRIC_REORDERING,
 
+	TCP_METRIC_RTT_US,	/* in usec units */
+	TCP_METRIC_RTTVAR_US,	/* in usec units */
+
 	/* Always last.  */
 	__TCP_METRIC_MAX,
 };
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bed379c7abcd..7374905b3701 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -387,7 +387,7 @@ void tcp_init_sock(struct sock *sk)
 	INIT_LIST_HEAD(&tp->tsq_node);
 
 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
-	tp->mdev = TCP_TIMEOUT_INIT;
+	tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
 
 	/* So many TCP implementations out there (incorrectly) count the
 	 * initial SYN frame in their delayed-ACK and congestion control
@@ -2339,7 +2339,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 
 	sk->sk_shutdown = 0;
 	sock_reset_flag(sk, SOCK_DONE);
-	tp->srtt = 0;
+	tp->srtt_us = 0;
 	if ((tp->write_seq += tp->max_window + 2) == 0)
 		tp->write_seq = 1;
 	icsk->icsk_backoff = 0;
@@ -2783,8 +2783,8 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
 
 	info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
 	info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
-	info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3;
-	info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2;
+	info->tcpi_rtt = tp->srtt_us >> 3;
+	info->tcpi_rttvar = tp->mdev_us >> 2;
 	info->tcpi_snd_ssthresh = tp->snd_ssthresh;
 	info->tcpi_snd_cwnd = tp->snd_cwnd;
 	info->tcpi_advmss = tp->advmss;
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 828e4c3ffbaf..8bf224516ba2 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -476,10 +476,6 @@ static int __init cubictcp_register(void)
 	/* divide by bic_scale and by constant Srtt (100ms) */
 	do_div(cube_factor, bic_scale * 10);
 
-	/* hystart needs ms clock resolution */
-	if (hystart && HZ < 1000)
-		cubictcp.flags |= TCP_CONG_RTT_STAMP;
-
 	return tcp_register_congestion_control(&cubictcp);
 }
 
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 2a1a9e2a4e51..a15a799bf768 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -21,7 +21,7 @@ struct hybla {
 	u32   rho2;	      /* Rho * Rho, integer part */
 	u32   rho_3ls;	      /* Rho parameter, <<3 */
 	u32   rho2_7ls;	      /* Rho^2, <<7	*/
-	u32   minrtt;	      /* Minimum smoothed round trip time value seen */
+	u32   minrtt_us;      /* Minimum smoothed round trip time value seen */
 };
 
 /* Hybla reference round trip time (default= 1/40 sec = 25 ms), in ms */
@@ -35,7 +35,9 @@ static inline void hybla_recalc_param (struct sock *sk)
 {
 	struct hybla *ca = inet_csk_ca(sk);
 
-	ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8);
+	ca->rho_3ls = max_t(u32,
+			    tcp_sk(sk)->srtt_us / (rtt0 * USEC_PER_MSEC),
+			    8U);
 	ca->rho = ca->rho_3ls >> 3;
 	ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1;
 	ca->rho2 = ca->rho2_7ls >> 7;
@@ -59,7 +61,7 @@ static void hybla_init(struct sock *sk)
 	hybla_recalc_param(sk);
 
 	/* set minimum rtt as this is the 1st ever seen */
-	ca->minrtt = tp->srtt;
+	ca->minrtt_us = tp->srtt_us;
 	tp->snd_cwnd = ca->rho;
 }
 
@@ -94,9 +96,9 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked,
 	int is_slowstart = 0;
 
 	/*  Recalculate rho only if this srtt is the lowest */
-	if (tp->srtt < ca->minrtt){
+	if (tp->srtt_us < ca->minrtt_us) {
 		hybla_recalc_param(sk);
-		ca->minrtt = tp->srtt;
+		ca->minrtt_us = tp->srtt_us;
 	}
 
 	if (!tcp_is_cwnd_limited(sk, in_flight))
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index be047c63ca10..863d105e3015 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -325,7 +325,6 @@ static void tcp_illinois_info(struct sock *sk, u32 ext,
 }
 
 static struct tcp_congestion_ops tcp_illinois __read_mostly = {
-	.flags		= TCP_CONG_RTT_STAMP,
 	.init		= tcp_illinois_init,
 	.ssthresh	= tcp_illinois_ssthresh,
 	.cong_avoid	= tcp_illinois_cong_avoid,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 227cba79fa6b..23a41d978fad 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -667,11 +667,11 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
  * To save cycles in the RFC 1323 implementation it was better to break
  * it up into three procedures. -- erics
  */
-static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
+static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	long m = mrtt; /* RTT */
-	u32 srtt = tp->srtt;
+	long m = mrtt_us; /* RTT */
+	u32 srtt = tp->srtt_us;
 
 	/*	The following amusing code comes from Jacobson's
 	 *	article in SIGCOMM '88.  Note that rtt and mdev
@@ -694,7 +694,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
 		srtt += m;		/* rtt = 7/8 rtt + 1/8 new */
 		if (m < 0) {
 			m = -m;		/* m is now abs(error) */
-			m -= (tp->mdev >> 2);   /* similar update on mdev */
+			m -= (tp->mdev_us >> 2);   /* similar update on mdev */
 			/* This is similar to one of Eifel findings.
 			 * Eifel blocks mdev updates when rtt decreases.
 			 * This solution is a bit different: we use finer gain
@@ -706,28 +706,29 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
 			if (m > 0)
 				m >>= 3;
 		} else {
-			m -= (tp->mdev >> 2);   /* similar update on mdev */
+			m -= (tp->mdev_us >> 2);   /* similar update on mdev */
 		}
-		tp->mdev += m;	    	/* mdev = 3/4 mdev + 1/4 new */
-		if (tp->mdev > tp->mdev_max) {
-			tp->mdev_max = tp->mdev;
-			if (tp->mdev_max > tp->rttvar)
-				tp->rttvar = tp->mdev_max;
+		tp->mdev_us += m;		/* mdev = 3/4 mdev + 1/4 new */
+		if (tp->mdev_us > tp->mdev_max_us) {
+			tp->mdev_max_us = tp->mdev_us;
+			if (tp->mdev_max_us > tp->rttvar_us)
+				tp->rttvar_us = tp->mdev_max_us;
 		}
 		if (after(tp->snd_una, tp->rtt_seq)) {
-			if (tp->mdev_max < tp->rttvar)
-				tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2;
+			if (tp->mdev_max_us < tp->rttvar_us)
+				tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
 			tp->rtt_seq = tp->snd_nxt;
-			tp->mdev_max = tcp_rto_min(sk);
+			tp->mdev_max_us = tcp_rto_min_us(sk);
 		}
 	} else {
 		/* no previous measure. */
 		srtt = m << 3;		/* take the measured time to be rtt */
-		tp->mdev = m << 1;	/* make sure rto = 3*rtt */
-		tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
+		tp->mdev_us = m << 1;	/* make sure rto = 3*rtt */
+		tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
+		tp->mdev_max_us = tp->rttvar_us;
 		tp->rtt_seq = tp->snd_nxt;
 	}
-	tp->srtt = max(1U, srtt);
+	tp->srtt_us = max(1U, srtt);
 }
 
 /* Set the sk_pacing_rate to allow proper sizing of TSO packets.
@@ -742,20 +743,12 @@ static void tcp_update_pacing_rate(struct sock *sk)
 	u64 rate;
 
 	/* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
-	rate = (u64)tp->mss_cache * 2 * (HZ << 3);
+	rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3);
 
 	rate *= max(tp->snd_cwnd, tp->packets_out);
 
-	/* Correction for small srtt and scheduling constraints.
-	 * For small rtt, consider noise is too high, and use
-	 * the minimal value (srtt = 1 -> 125 us for HZ=1000)
-	 *
-	 * We probably need usec resolution in the future.
-	 * Note: This also takes care of possible srtt=0 case,
-	 * when tcp_rtt_estimator() was not yet called.
-	 */
-	if (tp->srtt > 8 + 2)
-		do_div(rate, tp->srtt);
+	if (likely(tp->srtt_us))
+		do_div(rate, tp->srtt_us);
 
 	/* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate
 	 * without any lock. We want to make sure compiler wont store
@@ -1122,10 +1115,10 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
 }
 
 struct tcp_sacktag_state {
-	int reord;
-	int fack_count;
-	int flag;
-	s32 rtt; /* RTT measured by SACKing never-retransmitted data */
+	int	reord;
+	int	fack_count;
+	long	rtt_us; /* RTT measured by SACKing never-retransmitted data */
+	int	flag;
 };
 
 /* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@ -1186,7 +1179,8 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
 static u8 tcp_sacktag_one(struct sock *sk,
 			  struct tcp_sacktag_state *state, u8 sacked,
 			  u32 start_seq, u32 end_seq,
-			  int dup_sack, int pcount, u32 xmit_time)
+			  int dup_sack, int pcount,
+			  const struct skb_mstamp *xmit_time)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	int fack_count = state->fack_count;
@@ -1227,8 +1221,13 @@ static u8 tcp_sacktag_one(struct sock *sk,
 				if (!after(end_seq, tp->high_seq))
 					state->flag |= FLAG_ORIG_SACK_ACKED;
 				/* Pick the earliest sequence sacked for RTT */
-				if (state->rtt < 0)
-					state->rtt = tcp_time_stamp - xmit_time;
+				if (state->rtt_us < 0) {
+					struct skb_mstamp now;
+
+					skb_mstamp_get(&now);
+					state->rtt_us = skb_mstamp_us_delta(&now,
+								xmit_time);
+				}
 			}
 
 			if (sacked & TCPCB_LOST) {
@@ -1287,7 +1286,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 	 */
 	tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
 			start_seq, end_seq, dup_sack, pcount,
-			TCP_SKB_CB(skb)->when);
+			&skb->skb_mstamp);
 
 	if (skb == tp->lost_skb_hint)
 		tp->lost_cnt_hint += pcount;
@@ -1565,7 +1564,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
 						TCP_SKB_CB(skb)->end_seq,
 						dup_sack,
 						tcp_skb_pcount(skb),
-						TCP_SKB_CB(skb)->when);
+						&skb->skb_mstamp);
 
 			if (!before(TCP_SKB_CB(skb)->seq,
 				    tcp_highest_sack_seq(tp)))
@@ -1622,7 +1621,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl
 
 static int
 tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
-			u32 prior_snd_una, s32 *sack_rtt)
+			u32 prior_snd_una, long *sack_rtt_us)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	const unsigned char *ptr = (skb_transport_header(ack_skb) +
@@ -1640,7 +1639,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
 
 	state.flag = 0;
 	state.reord = tp->packets_out;
-	state.rtt = -1;
+	state.rtt_us = -1L;
 
 	if (!tp->sacked_out) {
 		if (WARN_ON(tp->fackets_out))
@@ -1824,7 +1823,7 @@ out:
 	WARN_ON((int)tp->retrans_out < 0);
 	WARN_ON((int)tcp_packets_in_flight(tp) < 0);
 #endif
-	*sack_rtt = state.rtt;
+	*sack_rtt_us = state.rtt_us;
 	return state.flag;
 }
 
@@ -2034,10 +2033,12 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
 	 * available, or RTO is scheduled to fire first.
 	 */
 	if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
-	    (flag & FLAG_ECE) || !tp->srtt)
+	    (flag & FLAG_ECE) || !tp->srtt_us)
 		return false;
 
-	delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2));
+	delay = max(usecs_to_jiffies(tp->srtt_us >> 5),
+		    msecs_to_jiffies(2));
+
 	if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
 		return false;
 
@@ -2884,7 +2885,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
 }
 
 static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
-				      s32 seq_rtt, s32 sack_rtt)
+				      long seq_rtt_us, long sack_rtt_us)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 
@@ -2894,10 +2895,10 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
 	 * is acked (RFC6298).
 	 */
 	if (flag & FLAG_RETRANS_DATA_ACKED)
-		seq_rtt = -1;
+		seq_rtt_us = -1L;
 
-	if (seq_rtt < 0)
-		seq_rtt = sack_rtt;
+	if (seq_rtt_us < 0)
+		seq_rtt_us = sack_rtt_us;
 
 	/* RTTM Rule: A TSecr value received in a segment is used to
 	 * update the averaged RTT measurement only if the segment
@@ -2905,14 +2906,14 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
 	 * left edge of the send window.
 	 * See draft-ietf-tcplw-high-performance-00, section 3.3.
 	 */
-	if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
+	if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
 	    flag & FLAG_ACKED)
-		seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+		seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - tp->rx_opt.rcv_tsecr);
 
-	if (seq_rtt < 0)
+	if (seq_rtt_us < 0)
 		return false;
 
-	tcp_rtt_estimator(sk, seq_rtt);
+	tcp_rtt_estimator(sk, seq_rtt_us);
 	tcp_set_rto(sk);
 
 	/* RFC6298: only reset backoff on valid RTT measurement. */
@@ -2924,16 +2925,16 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
 static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	s32 seq_rtt = -1;
+	long seq_rtt_us = -1L;
 
 	if (synack_stamp && !tp->total_retrans)
-		seq_rtt = tcp_time_stamp - synack_stamp;
+		seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - synack_stamp);
 
 	/* If the ACK acks both the SYNACK and the (Fast Open'd) data packets
 	 * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack()
 	 */
-	if (!tp->srtt)
-		tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1);
+	if (!tp->srtt_us)
+		tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L);
 }
 
 static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
@@ -3022,26 +3023,27 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
  * arrived at the other end.
  */
 static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
-			       u32 prior_snd_una, s32 sack_rtt)
+			       u32 prior_snd_una, long sack_rtt_us)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	struct sk_buff *skb;
-	u32 now = tcp_time_stamp;
+	struct skb_mstamp first_ackt, last_ackt, now;
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 prior_sacked = tp->sacked_out;
+	u32 reord = tp->packets_out;
 	bool fully_acked = true;
-	int flag = 0;
+	long ca_seq_rtt_us = -1L;
+	long seq_rtt_us = -1L;
+	struct sk_buff *skb;
 	u32 pkts_acked = 0;
-	u32 reord = tp->packets_out;
-	u32 prior_sacked = tp->sacked_out;
-	s32 seq_rtt = -1;
-	s32 ca_seq_rtt = -1;
-	ktime_t last_ackt = net_invalid_timestamp();
 	bool rtt_update;
+	int flag = 0;
+
+	first_ackt.v64 = 0;
 
 	while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
 		struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
-		u32 acked_pcount;
 		u8 sacked = scb->sacked;
+		u32 acked_pcount;
 
 		/* Determine how many packets and what bytes were acked, tso and else */
 		if (after(scb->end_seq, tp->snd_una)) {
@@ -3063,11 +3065,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 				tp->retrans_out -= acked_pcount;
 			flag |= FLAG_RETRANS_DATA_ACKED;
 		} else {
-			ca_seq_rtt = now - scb->when;
-			last_ackt = skb->tstamp;
-			if (seq_rtt < 0) {
-				seq_rtt = ca_seq_rtt;
-			}
+			last_ackt = skb->skb_mstamp;
+			if (!first_ackt.v64)
+				first_ackt = last_ackt;
+
 			if (!(sacked & TCPCB_SACKED_ACKED))
 				reord = min(pkts_acked, reord);
 			if (!after(scb->end_seq, tp->high_seq))
@@ -3113,7 +3114,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 	if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
 		flag |= FLAG_SACK_RENEGING;
 
-	rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt);
+	skb_mstamp_get(&now);
+	if (first_ackt.v64) {
+		seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
+		ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
+	}
+
+	rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
 
 	if (flag & FLAG_ACKED) {
 		const struct tcp_congestion_ops *ca_ops
@@ -3141,25 +3148,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 
 		tp->fackets_out -= min(pkts_acked, tp->fackets_out);
 
-		if (ca_ops->pkts_acked) {
-			s32 rtt_us = -1;
-
-			/* Is the ACK triggering packet unambiguous? */
-			if (!(flag & FLAG_RETRANS_DATA_ACKED)) {
-				/* High resolution needed and available? */
-				if (ca_ops->flags & TCP_CONG_RTT_STAMP &&
-				    !ktime_equal(last_ackt,
-						 net_invalid_timestamp()))
-					rtt_us = ktime_us_delta(ktime_get_real(),
-								last_ackt);
-				else if (ca_seq_rtt >= 0)
-					rtt_us = jiffies_to_usecs(ca_seq_rtt);
-			}
+		if (ca_ops->pkts_acked)
+			ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us);
 
-			ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
-		}
-	} else if (skb && rtt_update && sack_rtt >= 0 &&
-		   sack_rtt > (s32)(now - TCP_SKB_CB(skb)->when)) {
+	} else if (skb && rtt_update && sack_rtt_us >= 0 &&
+		   sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
 		/* Do not re-arm RTO if the sack RTT is measured from data sent
 		 * after when the head was last (re)transmitted. Otherwise the
 		 * timeout may continue to extend in loss recovery.
@@ -3369,12 +3362,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	u32 ack_seq = TCP_SKB_CB(skb)->seq;
 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
 	bool is_dupack = false;
-	u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt;
+	u32 prior_in_flight;
 	u32 prior_fackets;
 	int prior_packets = tp->packets_out;
 	const int prior_unsacked = tp->packets_out - tp->sacked_out;
 	int acked = 0; /* Number of packets newly acked */
-	s32 sack_rtt = -1;
+	long sack_rtt_us = -1L;
 
 	/* If the ack is older than previous acks
 	 * then we can probably ignore it.
@@ -3432,7 +3425,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 
 		if (TCP_SKB_CB(skb)->sacked)
 			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
-							&sack_rtt);
+							&sack_rtt_us);
 
 		if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
 			flag |= FLAG_ECE;
@@ -3451,7 +3444,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 
 	/* See if we can take anything off of the retransmit queue. */
 	acked = tp->packets_out;
-	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt);
+	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
+				    sack_rtt_us);
 	acked -= tp->packets_out;
 
 	/* Advance cwnd if state allows */
@@ -3474,8 +3468,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 
 	if (icsk->icsk_pending == ICSK_TIME_RETRANS)
 		tcp_schedule_loss_probe(sk);
-	if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd)
-		tcp_update_pacing_rate(sk);
+	tcp_update_pacing_rate(sk);
 	return 1;
 
 no_queue:
@@ -3504,7 +3497,7 @@ old_ack:
 	 */
 	if (TCP_SKB_CB(skb)->sacked) {
 		flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
-						&sack_rtt);
+						&sack_rtt_us);
 		tcp_fastretrans_alert(sk, acked, prior_unsacked,
 				      is_dupack, flag);
 	}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3cf976510497..17c0fb172fba 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -435,7 +435,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 			break;
 
 		icsk->icsk_backoff--;
-		inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
+		inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) :
 			TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
 		tcp_bound_rto(sk);
 
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 503798f2fcd6..c9aecae31327 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -315,7 +315,6 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us)
 }
 
 static struct tcp_congestion_ops tcp_lp __read_mostly = {
-	.flags = TCP_CONG_RTT_STAMP,
 	.init = tcp_lp_init,
 	.ssthresh = tcp_reno_ssthresh,
 	.cong_avoid = tcp_lp_cong_avoid,
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index d547075d8300..dcaf72f10216 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -33,6 +33,11 @@ struct tcp_fastopen_metrics {
 	struct	tcp_fastopen_cookie	cookie;
 };
 
+/* TCP_METRIC_MAX includes 2 extra fields for userspace compatibility
+ * Kernel only stores RTT and RTTVAR in usec resolution
+ */
+#define TCP_METRIC_MAX_KERNEL (TCP_METRIC_MAX - 2)
+
 struct tcp_metrics_block {
 	struct tcp_metrics_block __rcu	*tcpm_next;
 	struct inetpeer_addr		tcpm_saddr;
@@ -41,7 +46,7 @@ struct tcp_metrics_block {
 	u32				tcpm_ts;
 	u32				tcpm_ts_stamp;
 	u32				tcpm_lock;
-	u32				tcpm_vals[TCP_METRIC_MAX + 1];
+	u32				tcpm_vals[TCP_METRIC_MAX_KERNEL + 1];
 	struct tcp_fastopen_metrics	tcpm_fastopen;
 
 	struct rcu_head			rcu_head;
@@ -59,12 +64,6 @@ static u32 tcp_metric_get(struct tcp_metrics_block *tm,
 	return tm->tcpm_vals[idx];
 }
 
-static u32 tcp_metric_get_jiffies(struct tcp_metrics_block *tm,
-				  enum tcp_metric_index idx)
-{
-	return msecs_to_jiffies(tm->tcpm_vals[idx]);
-}
-
 static void tcp_metric_set(struct tcp_metrics_block *tm,
 			   enum tcp_metric_index idx,
 			   u32 val)
@@ -72,13 +71,6 @@ static void tcp_metric_set(struct tcp_metrics_block *tm,
 	tm->tcpm_vals[idx] = val;
 }
 
-static void tcp_metric_set_msecs(struct tcp_metrics_block *tm,
-				 enum tcp_metric_index idx,
-				 u32 val)
-{
-	tm->tcpm_vals[idx] = jiffies_to_msecs(val);
-}
-
 static bool addr_same(const struct inetpeer_addr *a,
 		      const struct inetpeer_addr *b)
 {
@@ -101,9 +93,11 @@ struct tcpm_hash_bucket {
 
 static DEFINE_SPINLOCK(tcp_metrics_lock);
 
-static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst,
+static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+			  const struct dst_entry *dst,
 			  bool fastopen_clear)
 {
+	u32 msval;
 	u32 val;
 
 	tm->tcpm_stamp = jiffies;
@@ -121,8 +115,11 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst,
 		val |= 1 << TCP_METRIC_REORDERING;
 	tm->tcpm_lock = val;
 
-	tm->tcpm_vals[TCP_METRIC_RTT] = dst_metric_raw(dst, RTAX_RTT);
-	tm->tcpm_vals[TCP_METRIC_RTTVAR] = dst_metric_raw(dst, RTAX_RTTVAR);
+	msval = dst_metric_raw(dst, RTAX_RTT);
+	tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
+
+	msval = dst_metric_raw(dst, RTAX_RTTVAR);
+	tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC;
 	tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
 	tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
 	tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
@@ -384,7 +381,7 @@ void tcp_update_metrics(struct sock *sk)
 		dst_confirm(dst);
 
 	rcu_read_lock();
-	if (icsk->icsk_backoff || !tp->srtt) {
+	if (icsk->icsk_backoff || !tp->srtt_us) {
 		/* This session failed to estimate rtt. Why?
 		 * Probably, no packets returned in time.  Reset our
 		 * results.
@@ -399,8 +396,8 @@ void tcp_update_metrics(struct sock *sk)
 	if (!tm)
 		goto out_unlock;
 
-	rtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT);
-	m = rtt - tp->srtt;
+	rtt = tcp_metric_get(tm, TCP_METRIC_RTT);
+	m = rtt - tp->srtt_us;
 
 	/* If newly calculated rtt larger than stored one, store new
 	 * one. Otherwise, use EWMA. Remember, rtt overestimation is
@@ -408,10 +405,10 @@ void tcp_update_metrics(struct sock *sk)
 	 */
 	if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) {
 		if (m <= 0)
-			rtt = tp->srtt;
+			rtt = tp->srtt_us;
 		else
 			rtt -= (m >> 3);
-		tcp_metric_set_msecs(tm, TCP_METRIC_RTT, rtt);
+		tcp_metric_set(tm, TCP_METRIC_RTT, rtt);
 	}
 
 	if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) {
@@ -422,16 +419,16 @@ void tcp_update_metrics(struct sock *sk)
 
 		/* Scale deviation to rttvar fixed point */
 		m >>= 1;
-		if (m < tp->mdev)
-			m = tp->mdev;
+		if (m < tp->mdev_us)
+			m = tp->mdev_us;
 
-		var = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR);
+		var = tcp_metric_get(tm, TCP_METRIC_RTTVAR);
 		if (m >= var)
 			var = m;
 		else
 			var -= (var - m) >> 2;
 
-		tcp_metric_set_msecs(tm, TCP_METRIC_RTTVAR, var);
+		tcp_metric_set(tm, TCP_METRIC_RTTVAR, var);
 	}
 
 	if (tcp_in_initial_slowstart(tp)) {
@@ -528,7 +525,7 @@ void tcp_init_metrics(struct sock *sk)
 		tp->reordering = val;
 	}
 
-	crtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT);
+	crtt = tcp_metric_get(tm, TCP_METRIC_RTT);
 	rcu_read_unlock();
 reset:
 	/* The initial RTT measurement from the SYN/SYN-ACK is not ideal
@@ -551,18 +548,20 @@ reset:
 	 * to low value, and then abruptly stops to do it and starts to delay
 	 * ACKs, wait for troubles.
 	 */
-	if (crtt > tp->srtt) {
+	if (crtt > tp->srtt_us) {
 		/* Set RTO like tcp_rtt_estimator(), but from cached RTT. */
-		crtt >>= 3;
+		crtt /= 8 * USEC_PER_MSEC;
 		inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk));
-	} else if (tp->srtt == 0) {
+	} else if (tp->srtt_us == 0) {
 		/* RFC6298: 5.7 We've failed to get a valid RTT sample from
 		 * 3WHS. This is most likely due to retransmission,
 		 * including spurious one. Reset the RTO back to 3secs
 		 * from the more aggressive 1sec to avoid more spurious
 		 * retransmission.
 		 */
-		tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK;
+		tp->rttvar_us = jiffies_to_usecs(TCP_TIMEOUT_FALLBACK);
+		tp->mdev_us = tp->mdev_max_us = tp->rttvar_us;
+
 		inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
 	}
 	/* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
@@ -809,10 +808,26 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
 		nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS);
 		if (!nest)
 			goto nla_put_failure;
-		for (i = 0; i < TCP_METRIC_MAX + 1; i++) {
-			if (!tm->tcpm_vals[i])
+		for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
+			u32 val = tm->tcpm_vals[i];
+
+			if (!val)
 				continue;
-			if (nla_put_u32(msg, i + 1, tm->tcpm_vals[i]) < 0)
+			if (i == TCP_METRIC_RTT) {
+				if (nla_put_u32(msg, TCP_METRIC_RTT_US + 1,
+						val) < 0)
+					goto nla_put_failure;
+				n++;
+				val = max(val / 1000, 1U);
+			}
+			if (i == TCP_METRIC_RTTVAR) {
+				if (nla_put_u32(msg, TCP_METRIC_RTTVAR_US + 1,
+						val) < 0)
+					goto nla_put_failure;
+				n++;
+				val = max(val / 1000, 1U);
+			}
+			if (nla_put_u32(msg, i + 1, val) < 0)
 				goto nla_put_failure;
 			n++;
 		}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7a436c517e44..ca788ada5bd3 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -398,8 +398,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 
 		tcp_init_wl(newtp, treq->rcv_isn);
 
-		newtp->srtt = 0;
-		newtp->mdev = TCP_TIMEOUT_INIT;
+		newtp->srtt_us = 0;
+		newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
 		newicsk->icsk_rto = TCP_TIMEOUT_INIT;
 
 		newtp->packets_out = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c5eadec001c1..bf38b1fb63ab 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -866,11 +866,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	if (clone_it) {
 		const struct sk_buff *fclone = skb + 1;
 
-		/* If congestion control is doing timestamping, we must
-		 * take such a timestamp before we potentially clone/copy.
-		 */
-		if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
-			__net_timestamp(skb);
+		skb_mstamp_get(&skb->skb_mstamp);
 
 		if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
 			     fclone->fclone == SKB_FCLONE_CLONE))
@@ -1974,7 +1970,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 timeout, tlp_time_stamp, rto_time_stamp;
-	u32 rtt = tp->srtt >> 3;
+	u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
 
 	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
 		return false;
@@ -1996,7 +1992,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 	/* Schedule a loss probe in 2*RTT for SACK capable connections
 	 * in Open state, that are either limited by cwnd or application.
 	 */
-	if (sysctl_tcp_early_retrans < 3 || !tp->srtt || !tp->packets_out ||
+	if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out ||
 	    !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
 		return false;
 
@@ -3050,8 +3046,9 @@ void tcp_send_delayed_ack(struct sock *sk)
 		 * Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements
 		 * directly.
 		 */
-		if (tp->srtt) {
-			int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
+		if (tp->srtt_us) {
+			int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3),
+					TCP_DELACK_MIN);
 
 			if (rtt < max_ato)
 				max_ato = rtt;
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 1f2d37613c9e..3b66610d4156 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -154,7 +154,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 			p->snd_wnd = tp->snd_wnd;
 			p->rcv_wnd = tp->rcv_wnd;
 			p->ssthresh = tcp_current_ssthresh(sk);
-			p->srtt = tp->srtt >> 3;
+			p->srtt = tp->srtt_us >> 3;
 
 			tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1);
 		}
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index a022c17c9cf1..48539fff6357 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -306,7 +306,6 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
 EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
 
 static struct tcp_congestion_ops tcp_vegas __read_mostly = {
-	.flags		= TCP_CONG_RTT_STAMP,
 	.init		= tcp_vegas_init,
 	.ssthresh	= tcp_reno_ssthresh,
 	.cong_avoid	= tcp_vegas_cong_avoid,
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 326475a94865..1b8e28fcd7e1 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -203,7 +203,6 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
 }
 
 static struct tcp_congestion_ops tcp_veno __read_mostly = {
-	.flags		= TCP_CONG_RTT_STAMP,
 	.init		= tcp_veno_init,
 	.ssthresh	= tcp_veno_ssthresh,
 	.cong_avoid	= tcp_veno_cong_avoid,
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 8eab02030ed0..5ede0e727945 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -227,7 +227,6 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) {
 }
 
 static struct tcp_congestion_ops tcp_yeah __read_mostly = {
-	.flags		= TCP_CONG_RTT_STAMP,
 	.init		= tcp_yeah_init,
 	.ssthresh	= tcp_yeah_ssthresh,
 	.cong_avoid	= tcp_yeah_cong_avoid,
-- 
cgit v1.2.3


From dc64d39b54c1e9db97a6fb1ca52598c981728157 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@linux-m68k.org>
Date: Tue, 25 Feb 2014 11:40:17 +0100
Subject: spi: spidev: Add support for Dual/Quad SPI Transfers

Add support for Dual/Quad SPI Transfers to the spidev API.
As this uses SPI mode bits that don't fit in a single byte, two new
ioctls (SPI_IOC_RD_MODE32 and SPI_IOC_WR_MODE32) are introduced.

Signed-off-by: Geert Uytterhoeven <geert+renesas@linux-m68k.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 Documentation/spi/spidev        |  6 ++++++
 drivers/spi/spidev.c            | 19 +++++++++++++++----
 include/uapi/linux/spi/spidev.h | 14 ++++++++++++--
 3 files changed, 33 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/spi/spidev b/Documentation/spi/spidev
index ed2da5e5b28a..3d14035b1766 100644
--- a/Documentation/spi/spidev
+++ b/Documentation/spi/spidev
@@ -85,6 +85,12 @@ settings for data transfer parameters:
 	SPI_MODE_0..SPI_MODE_3; or if you prefer you can combine SPI_CPOL
 	(clock polarity, idle high iff this is set) or SPI_CPHA (clock phase,
 	sample on trailing edge iff this is set) flags.
+	Note that this request is limited to SPI mode flags that fit in a
+	single byte.
+
+    SPI_IOC_RD_MODE32, SPI_IOC_WR_MODE32 ... pass a pointer to a uin32_t
+	which will return (RD) or assign (WR) the full SPI transfer mode,
+	not limited to the bits that fit in one byte.
 
     SPI_IOC_RD_LSB_FIRST, SPI_IOC_WR_LSB_FIRST ... pass a pointer to a byte
 	which will return (RD) or assign (WR) the bit justification used to
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 2abc0f5a82be..e3bc23bb5883 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -73,7 +73,8 @@ static DECLARE_BITMAP(minors, N_SPI_MINORS);
  */
 #define SPI_MODE_MASK		(SPI_CPHA | SPI_CPOL | SPI_CS_HIGH \
 				| SPI_LSB_FIRST | SPI_3WIRE | SPI_LOOP \
-				| SPI_NO_CS | SPI_READY)
+				| SPI_NO_CS | SPI_READY | SPI_TX_DUAL \
+				| SPI_TX_QUAD | SPI_RX_DUAL | SPI_RX_QUAD)
 
 struct spidev_data {
 	dev_t			devt;
@@ -265,6 +266,8 @@ static int spidev_message(struct spidev_data *spidev,
 		buf += k_tmp->len;
 
 		k_tmp->cs_change = !!u_tmp->cs_change;
+		k_tmp->tx_nbits = u_tmp->tx_nbits;
+		k_tmp->rx_nbits = u_tmp->rx_nbits;
 		k_tmp->bits_per_word = u_tmp->bits_per_word;
 		k_tmp->delay_usecs = u_tmp->delay_usecs;
 		k_tmp->speed_hz = u_tmp->speed_hz;
@@ -359,6 +362,10 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		retval = __put_user(spi->mode & SPI_MODE_MASK,
 					(__u8 __user *)arg);
 		break;
+	case SPI_IOC_RD_MODE32:
+		retval = __put_user(spi->mode & SPI_MODE_MASK,
+					(__u32 __user *)arg);
+		break;
 	case SPI_IOC_RD_LSB_FIRST:
 		retval = __put_user((spi->mode & SPI_LSB_FIRST) ?  1 : 0,
 					(__u8 __user *)arg);
@@ -372,7 +379,11 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
 	/* write requests */
 	case SPI_IOC_WR_MODE:
-		retval = __get_user(tmp, (u8 __user *)arg);
+	case SPI_IOC_WR_MODE32:
+		if (cmd == SPI_IOC_WR_MODE)
+			retval = __get_user(tmp, (u8 __user *)arg);
+		else
+			retval = __get_user(tmp, (u32 __user *)arg);
 		if (retval == 0) {
 			u32	save = spi->mode;
 
@@ -382,12 +393,12 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			}
 
 			tmp |= spi->mode & ~SPI_MODE_MASK;
-			spi->mode = (u8)tmp;
+			spi->mode = (u16)tmp;
 			retval = spi_setup(spi);
 			if (retval < 0)
 				spi->mode = save;
 			else
-				dev_dbg(&spi->dev, "spi mode %02x\n", tmp);
+				dev_dbg(&spi->dev, "spi mode %x\n", tmp);
 		}
 		break;
 	case SPI_IOC_WR_LSB_FIRST:
diff --git a/include/uapi/linux/spi/spidev.h b/include/uapi/linux/spi/spidev.h
index 52d9ed01855f..dd5f21e75805 100644
--- a/include/uapi/linux/spi/spidev.h
+++ b/include/uapi/linux/spi/spidev.h
@@ -42,6 +42,10 @@
 #define SPI_LOOP		0x20
 #define SPI_NO_CS		0x40
 #define SPI_READY		0x80
+#define SPI_TX_DUAL		0x100
+#define SPI_TX_QUAD		0x200
+#define SPI_RX_DUAL		0x400
+#define SPI_RX_QUAD		0x800
 
 /*---------------------------------------------------------------------------*/
 
@@ -92,7 +96,9 @@ struct spi_ioc_transfer {
 	__u16		delay_usecs;
 	__u8		bits_per_word;
 	__u8		cs_change;
-	__u32		pad;
+	__u8		tx_nbits;
+	__u8		rx_nbits;
+	__u16		pad;
 
 	/* If the contents of 'struct spi_ioc_transfer' ever change
 	 * incompatibly, then the ioctl number (currently 0) must change;
@@ -110,7 +116,7 @@ struct spi_ioc_transfer {
 #define SPI_IOC_MESSAGE(N) _IOW(SPI_IOC_MAGIC, 0, char[SPI_MSGSIZE(N)])
 
 
-/* Read / Write of SPI mode (SPI_MODE_0..SPI_MODE_3) */
+/* Read / Write of SPI mode (SPI_MODE_0..SPI_MODE_3) (limited to 8 bits) */
 #define SPI_IOC_RD_MODE			_IOR(SPI_IOC_MAGIC, 1, __u8)
 #define SPI_IOC_WR_MODE			_IOW(SPI_IOC_MAGIC, 1, __u8)
 
@@ -126,6 +132,10 @@ struct spi_ioc_transfer {
 #define SPI_IOC_RD_MAX_SPEED_HZ		_IOR(SPI_IOC_MAGIC, 4, __u32)
 #define SPI_IOC_WR_MAX_SPEED_HZ		_IOW(SPI_IOC_MAGIC, 4, __u32)
 
+/* Read / Write of the SPI mode field */
+#define SPI_IOC_RD_MODE32		_IOR(SPI_IOC_MAGIC, 5, __u32)
+#define SPI_IOC_WR_MODE32		_IOW(SPI_IOC_MAGIC, 5, __u32)
+
 
 
 #endif /* SPIDEV_H */
-- 
cgit v1.2.3


From 0768b3b3d228c5acf2075f40f3d25cda30011d4f Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 19 Feb 2014 17:27:06 +0100
Subject: netfilter: nf_tables: add optional user data area to rules

This allows us to store user comment strings, but it could be also
used to store any kind of information that the user application needs
to link to the rule.

Scratch 8 bits for the new ulen field that indicates the length the
user data area. 4 bits from the handle (so it's 42 bits long, according
to Patrick, it would last 139 years with 1000 new rules per second)
and 4 bits from dlen (so the expression data area is 4K, which seems
sufficient by now even considering the compatibility layer).

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_tables.h        | 11 +++++++++--
 include/uapi/linux/netfilter/nf_tables.h |  5 ++++-
 net/netfilter/nf_tables_api.c            | 17 +++++++++++++++--
 3 files changed, 28 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 81abd61500f4..5af56da6d6c6 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -326,13 +326,15 @@ static inline void *nft_expr_priv(const struct nft_expr *expr)
  *	@handle: rule handle
  *	@genmask: generation mask
  *	@dlen: length of expression data
+ *	@ulen: length of user data (used for comments)
  *	@data: expression data
  */
 struct nft_rule {
 	struct list_head		list;
-	u64				handle:46,
+	u64				handle:42,
 					genmask:2,
-					dlen:16;
+					dlen:12,
+					ulen:8;
 	unsigned char			data[]
 		__attribute__((aligned(__alignof__(struct nft_expr))));
 };
@@ -371,6 +373,11 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule)
 	return (struct nft_expr *)&rule->data[rule->dlen];
 }
 
+static inline void *nft_userdata(const struct nft_rule *rule)
+{
+	return (void *)&rule->data[rule->dlen];
+}
+
 /*
  * The last pointer isn't really necessary, but the compiler isn't able to
  * determine that the result of nft_expr_last() is always the same since it
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index c84c452c62a7..c88ccbfda5f1 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1,7 +1,8 @@
 #ifndef _LINUX_NF_TABLES_H
 #define _LINUX_NF_TABLES_H
 
-#define NFT_CHAIN_MAXNAMELEN 32
+#define NFT_CHAIN_MAXNAMELEN	32
+#define NFT_USERDATA_MAXLEN	256
 
 enum nft_registers {
 	NFT_REG_VERDICT,
@@ -156,6 +157,7 @@ enum nft_chain_attributes {
  * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes)
  * @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes)
  * @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64)
+ * @NFTA_RULE_USERDATA: user data (NLA_BINARY, NFT_USERDATA_MAXLEN)
  */
 enum nft_rule_attributes {
 	NFTA_RULE_UNSPEC,
@@ -165,6 +167,7 @@ enum nft_rule_attributes {
 	NFTA_RULE_EXPRESSIONS,
 	NFTA_RULE_COMPAT,
 	NFTA_RULE_POSITION,
+	NFTA_RULE_USERDATA,
 	__NFTA_RULE_MAX
 };
 #define NFTA_RULE_MAX		(__NFTA_RULE_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 0b5634094cb0..f25d0110fe95 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1295,6 +1295,8 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
 	[NFTA_RULE_EXPRESSIONS]	= { .type = NLA_NESTED },
 	[NFTA_RULE_COMPAT]	= { .type = NLA_NESTED },
 	[NFTA_RULE_POSITION]	= { .type = NLA_U64 },
+	[NFTA_RULE_USERDATA]	= { .type = NLA_BINARY,
+				    .len = NFT_USERDATA_MAXLEN },
 };
 
 static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
@@ -1347,6 +1349,10 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
 	}
 	nla_nest_end(skb, list);
 
+	if (rule->ulen &&
+	    nla_put(skb, NFTA_RULE_USERDATA, rule->ulen, nft_userdata(rule)))
+		goto nla_put_failure;
+
 	return nlmsg_end(skb, nlh);
 
 nla_put_failure:
@@ -1583,7 +1589,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 	struct nft_expr *expr;
 	struct nft_ctx ctx;
 	struct nlattr *tmp;
-	unsigned int size, i, n;
+	unsigned int size, i, n, ulen = 0;
 	int err, rem;
 	bool create;
 	u64 handle, pos_handle;
@@ -1649,8 +1655,11 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 		}
 	}
 
+	if (nla[NFTA_RULE_USERDATA])
+		ulen = nla_len(nla[NFTA_RULE_USERDATA]);
+
 	err = -ENOMEM;
-	rule = kzalloc(sizeof(*rule) + size, GFP_KERNEL);
+	rule = kzalloc(sizeof(*rule) + size + ulen, GFP_KERNEL);
 	if (rule == NULL)
 		goto err1;
 
@@ -1658,6 +1667,10 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 
 	rule->handle = handle;
 	rule->dlen   = size;
+	rule->ulen   = ulen;
+
+	if (ulen)
+		nla_memcpy(nft_userdata(rule), nla[NFTA_RULE_USERDATA], ulen);
 
 	expr = nft_expr_first(rule);
 	for (i = 0; i < n; i++) {
-- 
cgit v1.2.3


From 589f5816f3f660aa4b1c93fc9eac58a1e807bbbb Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Tue, 25 Feb 2014 17:15:12 -0800
Subject: net: kdoc struct net_device flags and priv_flags

We have documentation for these flags but they're scattered
all over the place. #defines don't allow documentation to be
written easily so to help to start bringing some documentation
together use the enums kdoc practice but keep the defines to
allow userspace to be able to #ifdef them.

I've verified the same values are assigned before and after
with a simple userspace test program [0] and checksumming the
output.

[0] http://drvbp1.linux-foundation.org/~mcgrof/kdoc/netdev_flags/

mcgrof@gnat ~/tmp $ ./check-flags | sha1sum
0ec5b6b1840aa3bb9ce464e61c564820871c92c3  -

Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Ben Hutchings <ben@decadent.org.uk>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if.h | 213 ++++++++++++++++++++++++++++++++++++------------
 1 file changed, 161 insertions(+), 52 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h
index d758163b0e43..42ec87994cf6 100644
--- a/include/uapi/linux/if.h
+++ b/include/uapi/linux/if.h
@@ -27,64 +27,173 @@
 #define	IFALIASZ	256
 #include <linux/hdlc/ioctl.h>
 
-/* Standard interface flags (netdevice->flags). */
-#define	IFF_UP		0x1		/* interface is up		*/
-#define	IFF_BROADCAST	0x2		/* broadcast address valid	*/
-#define	IFF_DEBUG	0x4		/* turn on debugging		*/
-#define	IFF_LOOPBACK	0x8		/* is a loopback net		*/
-#define	IFF_POINTOPOINT	0x10		/* interface is has p-p link	*/
-#define	IFF_NOTRAILERS	0x20		/* avoid use of trailers	*/
-#define	IFF_RUNNING	0x40		/* interface RFC2863 OPER_UP	*/
-#define	IFF_NOARP	0x80		/* no ARP protocol		*/
-#define	IFF_PROMISC	0x100		/* receive all packets		*/
-#define	IFF_ALLMULTI	0x200		/* receive all multicast packets*/
-
-#define IFF_MASTER	0x400		/* master of a load balancer 	*/
-#define IFF_SLAVE	0x800		/* slave of a load balancer	*/
-
-#define IFF_MULTICAST	0x1000		/* Supports multicast		*/
-
-#define IFF_PORTSEL	0x2000          /* can set media type		*/
-#define IFF_AUTOMEDIA	0x4000		/* auto media select active	*/
-#define IFF_DYNAMIC	0x8000		/* dialup device with changing addresses*/
-
-#define IFF_LOWER_UP	0x10000		/* driver signals L1 up		*/
-#define IFF_DORMANT	0x20000		/* driver signals dormant	*/
+/**
+ * enum net_device_flags - &struct net_device flags
+ *
+ * These are the &struct net_device flags, they can be set by drivers, the
+ * kernel and some can be triggered by userspace. Userspace can query and
+ * set these flags using userspace utilities but there is also a sysfs
+ * entry available for all dev flags which can be queried and set. These flags
+ * are shared for all types of net_devices. The sysfs entries are available
+ * via /sys/class/net/<dev>/flags. Flags which can be toggled through sysfs
+ * are annotated below, note that only a few flags can be toggled and some
+ * other flags are always always preserved from the original net_device flags
+ * even if you try to set them via sysfs. Flags which are always preserved
+ * are kept under the flag grouping @IFF_VOLATILE. Flags which are volatile
+ * are annotated below as such.
+ *
+ * You should have a pretty good reason to be extending these flags.
+ *
+ * @IFF_UP: interface is up. Can be toggled through sysfs.
+ * @IFF_BROADCAST: broadcast address valid. Volatile.
+ * @IFF_DEBUG: turn on debugging. Can be toggled through sysfs.
+ * @IFF_LOOPBACK: is a loopback net. Volatile.
+ * @IFF_POINTOPOINT: interface is has p-p link. Volatile.
+ * @IFF_NOTRAILERS: avoid use of trailers. Can be toggled through sysfs.
+ *	Volatile.
+ * @IFF_RUNNING: interface RFC2863 OPER_UP. Volatile.
+ * @IFF_NOARP: no ARP protocol. Can be toggled through sysfs. Volatile.
+ * @IFF_PROMISC: receive all packets. Can be toggled through sysfs.
+ * @IFF_ALLMULTI: receive all multicast packets. Can be toggled through
+ *	sysfs.
+ * @IFF_MASTER: master of a load balancer. Volatile.
+ * @IFF_SLAVE: slave of a load balancer. Volatile.
+ * @IFF_MULTICAST: Supports multicast. Can be toggled through sysfs.
+ * @IFF_PORTSEL: can set media type. Can be toggled through sysfs.
+ * @IFF_AUTOMEDIA: auto media select active. Can be toggled through sysfs.
+ * @IFF_DYNAMIC: dialup device with changing addresses. Can be toggled
+ *	through sysfs.
+ * @IFF_LOWER_UP: driver signals L1 up. Volatile.
+ * @IFF_DORMANT: driver signals dormant. Volatile.
+ * @IFF_ECHO: echo sent packets. Volatile.
+ */
+enum net_device_flags {
+	IFF_UP				= 1<<0,  /* sysfs */
+	IFF_BROADCAST			= 1<<1,  /* volatile */
+	IFF_DEBUG			= 1<<2,  /* sysfs */
+	IFF_LOOPBACK			= 1<<3,  /* volatile */
+	IFF_POINTOPOINT			= 1<<4,  /* volatile */
+	IFF_NOTRAILERS			= 1<<5,  /* sysfs */
+	IFF_RUNNING			= 1<<6,  /* volatile */
+	IFF_NOARP			= 1<<7,  /* sysfs */
+	IFF_PROMISC			= 1<<8,  /* sysfs */
+	IFF_ALLMULTI			= 1<<9,  /* sysfs */
+	IFF_MASTER			= 1<<10, /* volatile */
+	IFF_SLAVE			= 1<<11, /* volatile */
+	IFF_MULTICAST			= 1<<12, /* sysfs */
+	IFF_PORTSEL			= 1<<13, /* sysfs */
+	IFF_AUTOMEDIA			= 1<<14, /* sysfs */
+	IFF_DYNAMIC			= 1<<15, /* sysfs */
+	IFF_LOWER_UP			= 1<<16, /* volatile */
+	IFF_DORMANT			= 1<<17, /* volatile */
+	IFF_ECHO			= 1<<18, /* volatile */
+};
 
-#define IFF_ECHO	0x40000		/* echo sent packets		*/
+#define IFF_UP				IFF_UP
+#define IFF_BROADCAST			IFF_BROADCAST
+#define IFF_DEBUG			IFF_DEBUG
+#define IFF_LOOPBACK			IFF_LOOPBACK
+#define IFF_POINTOPOINT			IFF_POINTOPOINT
+#define IFF_NOTRAILERS			IFF_NOTRAILERS
+#define IFF_RUNNING			IFF_RUNNING
+#define IFF_NOARP			IFF_NOARP
+#define IFF_PROMISC			IFF_PROMISC
+#define IFF_ALLMULTI			IFF_ALLMULTI
+#define IFF_MASTER			IFF_MASTER
+#define IFF_SLAVE			IFF_SLAVE
+#define IFF_MULTICAST			IFF_MULTICAST
+#define IFF_PORTSEL			IFF_PORTSEL
+#define IFF_AUTOMEDIA			IFF_AUTOMEDIA
+#define IFF_DYNAMIC			IFF_DYNAMIC
+#define IFF_LOWER_UP			IFF_LOWER_UP
+#define IFF_DORMANT			IFF_DORMANT
+#define IFF_ECHO			IFF_ECHO
 
 #define IFF_VOLATILE	(IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_ECHO|\
 		IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT)
 
-/* Private (from user) interface flags (netdevice->priv_flags). */
-#define IFF_802_1Q_VLAN 0x1             /* 802.1Q VLAN device.          */
-#define IFF_EBRIDGE	0x2		/* Ethernet bridging device.	*/
-#define IFF_SLAVE_INACTIVE	0x4	/* bonding slave not the curr. active */
-#define IFF_MASTER_8023AD	0x8	/* bonding master, 802.3ad. 	*/
-#define IFF_MASTER_ALB	0x10		/* bonding master, balance-alb.	*/
-#define IFF_BONDING	0x20		/* bonding master or slave	*/
-#define IFF_SLAVE_NEEDARP 0x40		/* need ARPs for validation	*/
-#define IFF_ISATAP	0x80		/* ISATAP interface (RFC4214)	*/
-#define IFF_MASTER_ARPMON 0x100		/* bonding master, ARP mon in use */
-#define IFF_WAN_HDLC	0x200		/* WAN HDLC device		*/
-#define IFF_XMIT_DST_RELEASE 0x400	/* dev_hard_start_xmit() is allowed to
-					 * release skb->dst
-					 */
-#define IFF_DONT_BRIDGE 0x800		/* disallow bridging this ether dev */
-#define IFF_DISABLE_NETPOLL	0x1000	/* disable netpoll at run-time */
-#define IFF_MACVLAN_PORT	0x2000	/* device used as macvlan port */
-#define IFF_BRIDGE_PORT	0x4000		/* device used as bridge port */
-#define IFF_OVS_DATAPATH	0x8000	/* device used as Open vSwitch
-					 * datapath port */
-#define IFF_TX_SKB_SHARING	0x10000	/* The interface supports sharing
-					 * skbs on transmit */
-#define IFF_UNICAST_FLT	0x20000		/* Supports unicast filtering	*/
-#define IFF_TEAM_PORT	0x40000		/* device used as team port */
-#define IFF_SUPP_NOFCS	0x80000		/* device supports sending custom FCS */
-#define IFF_LIVE_ADDR_CHANGE 0x100000	/* device supports hardware address
-					 * change when it's running */
-#define IFF_MACVLAN 0x200000		/* Macvlan device */
+/**
+ * enum net_device_priv_flags - &struct net_device priv_flags
+ *
+ * These are the &struct net_device, they are only set internally
+ * by drivers and used in the kernel. These flags are invisible to
+ * userspace, this means that the order of these flags can change
+ * during any kernel release.
+ *
+ * You should have a pretty good reason to be extending these flags.
+ *
+ * @IFF_802_1Q_VLAN: 802.1Q VLAN device
+ * @IFF_EBRIDGE: Ethernet bridging device
+ * @IFF_SLAVE_INACTIVE: bonding slave not the curr. active
+ * @IFF_MASTER_8023AD: bonding master, 802.3ad
+ * @IFF_MASTER_ALB: bonding master, balance-alb
+ * @IFF_BONDING: bonding master or slave
+ * @IFF_SLAVE_NEEDARP: need ARPs for validation
+ * @IFF_ISATAP: ISATAP interface (RFC4214)
+ * @IFF_MASTER_ARPMON: bonding master, ARP mon in use
+ * @IFF_WAN_HDLC: WAN HDLC device
+ * @IFF_XMIT_DST_RELEASE: dev_hard_start_xmit() is allowed to
+ *	release skb->dst
+ * @IFF_DONT_BRIDGE: disallow bridging this ether dev
+ * @IFF_DISABLE_NETPOLL: disable netpoll at run-time
+ * @IFF_MACVLAN_PORT: device used as macvlan port
+ * @IFF_BRIDGE_PORT: device used as bridge port
+ * @IFF_OVS_DATAPATH: device used as Open vSwitch datapath port
+ * @IFF_TX_SKB_SHARING: The interface supports sharing skbs on transmit
+ * @IFF_UNICAST_FLT: Supports unicast filtering
+ * @IFF_TEAM_PORT: device used as team port
+ * @IFF_SUPP_NOFCS: device supports sending custom FCS
+ * @IFF_LIVE_ADDR_CHANGE: device supports hardware address
+ *	change when it's running
+ * @IFF_MACVLAN: Macvlan device
+ */
+enum netdev_priv_flags {
+	IFF_802_1Q_VLAN			= 1<<0,
+	IFF_EBRIDGE			= 1<<1,
+	IFF_SLAVE_INACTIVE		= 1<<2,
+	IFF_MASTER_8023AD		= 1<<3,
+	IFF_MASTER_ALB			= 1<<4,
+	IFF_BONDING			= 1<<5,
+	IFF_SLAVE_NEEDARP		= 1<<6,
+	IFF_ISATAP			= 1<<7,
+	IFF_MASTER_ARPMON		= 1<<8,
+	IFF_WAN_HDLC			= 1<<9,
+	IFF_XMIT_DST_RELEASE		= 1<<10,
+	IFF_DONT_BRIDGE			= 1<<11,
+	IFF_DISABLE_NETPOLL		= 1<<12,
+	IFF_MACVLAN_PORT		= 1<<13,
+	IFF_BRIDGE_PORT			= 1<<14,
+	IFF_OVS_DATAPATH		= 1<<15,
+	IFF_TX_SKB_SHARING		= 1<<16,
+	IFF_UNICAST_FLT			= 1<<17,
+	IFF_TEAM_PORT			= 1<<18,
+	IFF_SUPP_NOFCS			= 1<<19,
+	IFF_LIVE_ADDR_CHANGE		= 1<<20,
+	IFF_MACVLAN			= 1<<21,
+};
 
+#define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
+#define IFF_EBRIDGE			IFF_EBRIDGE
+#define IFF_SLAVE_INACTIVE		IFF_SLAVE_INACTIVE
+#define IFF_MASTER_8023AD		IFF_MASTER_8023AD
+#define IFF_MASTER_ALB			IFF_MASTER_ALB
+#define IFF_BONDING			IFF_BONDING
+#define IFF_SLAVE_NEEDARP		IFF_SLAVE_NEEDARP
+#define IFF_ISATAP			IFF_ISATAP
+#define IFF_MASTER_ARPMON		IFF_MASTER_ARPMON
+#define IFF_WAN_HDLC			IFF_WAN_HDLC
+#define IFF_XMIT_DST_RELEASE		IFF_XMIT_DST_RELEASE
+#define IFF_DONT_BRIDGE			IFF_DONT_BRIDGE
+#define IFF_DISABLE_NETPOLL		IFF_DISABLE_NETPOLL
+#define IFF_MACVLAN_PORT		IFF_MACVLAN_PORT
+#define IFF_BRIDGE_PORT			IFF_BRIDGE_PORT
+#define IFF_OVS_DATAPATH		IFF_OVS_DATAPATH
+#define IFF_TX_SKB_SHARING		IFF_TX_SKB_SHARING
+#define IFF_UNICAST_FLT			IFF_UNICAST_FLT
+#define IFF_TEAM_PORT			IFF_TEAM_PORT
+#define IFF_SUPP_NOFCS			IFF_SUPP_NOFCS
+#define IFF_LIVE_ADDR_CHANGE		IFF_LIVE_ADDR_CHANGE
+#define IFF_MACVLAN			IFF_MACVLAN
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
-- 
cgit v1.2.3


From 7aa98047df95d7caf0678e939cdd936dfb99cd06 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Tue, 25 Feb 2014 17:15:13 -0800
Subject: net: move net_device priv_flags out from UAPI

These are private to userspace, and they're unstable
anyway and can be shuffled at will (see 080e4130b1fb)
so any userspace application relying on them is on crack.

Test compiled with allyesconfig.

mcgrof@drvbp1 /pub/mem/mcgrof/net-next (git::master)$ make allyesconfig
mcgrof@drvbp1 /pub/mem/mcgrof/net-next (git::master)$ time make -j 20
...
  BUILD   arch/x86/boot/bzImage
Setup is 16992 bytes (padded to 17408 bytes).
System is 56153 kB
CRC 721d2751
Kernel: arch/x86/boot/bzImage is ready  (#1)
real    19m35.744s
user    280m37.984s
sys     27m54.104s

Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Ben Hutchings <ben@decadent.org.uk>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 83 +++++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/if.h   | 83 -----------------------------------------------
 2 files changed, 83 insertions(+), 83 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5e84483c0650..1a869488b8ae 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1147,6 +1147,89 @@ struct net_device_ops {
 							void *priv);
 };
 
+/**
+ * enum net_device_priv_flags - &struct net_device priv_flags
+ *
+ * These are the &struct net_device, they are only set internally
+ * by drivers and used in the kernel. These flags are invisible to
+ * userspace, this means that the order of these flags can change
+ * during any kernel release.
+ *
+ * You should have a pretty good reason to be extending these flags.
+ *
+ * @IFF_802_1Q_VLAN: 802.1Q VLAN device
+ * @IFF_EBRIDGE: Ethernet bridging device
+ * @IFF_SLAVE_INACTIVE: bonding slave not the curr. active
+ * @IFF_MASTER_8023AD: bonding master, 802.3ad
+ * @IFF_MASTER_ALB: bonding master, balance-alb
+ * @IFF_BONDING: bonding master or slave
+ * @IFF_SLAVE_NEEDARP: need ARPs for validation
+ * @IFF_ISATAP: ISATAP interface (RFC4214)
+ * @IFF_MASTER_ARPMON: bonding master, ARP mon in use
+ * @IFF_WAN_HDLC: WAN HDLC device
+ * @IFF_XMIT_DST_RELEASE: dev_hard_start_xmit() is allowed to
+ *	release skb->dst
+ * @IFF_DONT_BRIDGE: disallow bridging this ether dev
+ * @IFF_DISABLE_NETPOLL: disable netpoll at run-time
+ * @IFF_MACVLAN_PORT: device used as macvlan port
+ * @IFF_BRIDGE_PORT: device used as bridge port
+ * @IFF_OVS_DATAPATH: device used as Open vSwitch datapath port
+ * @IFF_TX_SKB_SHARING: The interface supports sharing skbs on transmit
+ * @IFF_UNICAST_FLT: Supports unicast filtering
+ * @IFF_TEAM_PORT: device used as team port
+ * @IFF_SUPP_NOFCS: device supports sending custom FCS
+ * @IFF_LIVE_ADDR_CHANGE: device supports hardware address
+ *	change when it's running
+ * @IFF_MACVLAN: Macvlan device
+ */
+enum netdev_priv_flags {
+	IFF_802_1Q_VLAN			= 1<<0,
+	IFF_EBRIDGE			= 1<<1,
+	IFF_SLAVE_INACTIVE		= 1<<2,
+	IFF_MASTER_8023AD		= 1<<3,
+	IFF_MASTER_ALB			= 1<<4,
+	IFF_BONDING			= 1<<5,
+	IFF_SLAVE_NEEDARP		= 1<<6,
+	IFF_ISATAP			= 1<<7,
+	IFF_MASTER_ARPMON		= 1<<8,
+	IFF_WAN_HDLC			= 1<<9,
+	IFF_XMIT_DST_RELEASE		= 1<<10,
+	IFF_DONT_BRIDGE			= 1<<11,
+	IFF_DISABLE_NETPOLL		= 1<<12,
+	IFF_MACVLAN_PORT		= 1<<13,
+	IFF_BRIDGE_PORT			= 1<<14,
+	IFF_OVS_DATAPATH		= 1<<15,
+	IFF_TX_SKB_SHARING		= 1<<16,
+	IFF_UNICAST_FLT			= 1<<17,
+	IFF_TEAM_PORT			= 1<<18,
+	IFF_SUPP_NOFCS			= 1<<19,
+	IFF_LIVE_ADDR_CHANGE		= 1<<20,
+	IFF_MACVLAN			= 1<<21,
+};
+
+#define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
+#define IFF_EBRIDGE			IFF_EBRIDGE
+#define IFF_SLAVE_INACTIVE		IFF_SLAVE_INACTIVE
+#define IFF_MASTER_8023AD		IFF_MASTER_8023AD
+#define IFF_MASTER_ALB			IFF_MASTER_ALB
+#define IFF_BONDING			IFF_BONDING
+#define IFF_SLAVE_NEEDARP		IFF_SLAVE_NEEDARP
+#define IFF_ISATAP			IFF_ISATAP
+#define IFF_MASTER_ARPMON		IFF_MASTER_ARPMON
+#define IFF_WAN_HDLC			IFF_WAN_HDLC
+#define IFF_XMIT_DST_RELEASE		IFF_XMIT_DST_RELEASE
+#define IFF_DONT_BRIDGE			IFF_DONT_BRIDGE
+#define IFF_DISABLE_NETPOLL		IFF_DISABLE_NETPOLL
+#define IFF_MACVLAN_PORT		IFF_MACVLAN_PORT
+#define IFF_BRIDGE_PORT			IFF_BRIDGE_PORT
+#define IFF_OVS_DATAPATH		IFF_OVS_DATAPATH
+#define IFF_TX_SKB_SHARING		IFF_TX_SKB_SHARING
+#define IFF_UNICAST_FLT			IFF_UNICAST_FLT
+#define IFF_TEAM_PORT			IFF_TEAM_PORT
+#define IFF_SUPP_NOFCS			IFF_SUPP_NOFCS
+#define IFF_LIVE_ADDR_CHANGE		IFF_LIVE_ADDR_CHANGE
+#define IFF_MACVLAN			IFF_MACVLAN
+
 /*
  *	The DEVICE structure.
  *	Actually, this whole structure is a big mistake.  It mixes I/O
diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h
index 42ec87994cf6..9cf2394f0bcf 100644
--- a/include/uapi/linux/if.h
+++ b/include/uapi/linux/if.h
@@ -112,89 +112,6 @@ enum net_device_flags {
 #define IFF_VOLATILE	(IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_ECHO|\
 		IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT)
 
-/**
- * enum net_device_priv_flags - &struct net_device priv_flags
- *
- * These are the &struct net_device, they are only set internally
- * by drivers and used in the kernel. These flags are invisible to
- * userspace, this means that the order of these flags can change
- * during any kernel release.
- *
- * You should have a pretty good reason to be extending these flags.
- *
- * @IFF_802_1Q_VLAN: 802.1Q VLAN device
- * @IFF_EBRIDGE: Ethernet bridging device
- * @IFF_SLAVE_INACTIVE: bonding slave not the curr. active
- * @IFF_MASTER_8023AD: bonding master, 802.3ad
- * @IFF_MASTER_ALB: bonding master, balance-alb
- * @IFF_BONDING: bonding master or slave
- * @IFF_SLAVE_NEEDARP: need ARPs for validation
- * @IFF_ISATAP: ISATAP interface (RFC4214)
- * @IFF_MASTER_ARPMON: bonding master, ARP mon in use
- * @IFF_WAN_HDLC: WAN HDLC device
- * @IFF_XMIT_DST_RELEASE: dev_hard_start_xmit() is allowed to
- *	release skb->dst
- * @IFF_DONT_BRIDGE: disallow bridging this ether dev
- * @IFF_DISABLE_NETPOLL: disable netpoll at run-time
- * @IFF_MACVLAN_PORT: device used as macvlan port
- * @IFF_BRIDGE_PORT: device used as bridge port
- * @IFF_OVS_DATAPATH: device used as Open vSwitch datapath port
- * @IFF_TX_SKB_SHARING: The interface supports sharing skbs on transmit
- * @IFF_UNICAST_FLT: Supports unicast filtering
- * @IFF_TEAM_PORT: device used as team port
- * @IFF_SUPP_NOFCS: device supports sending custom FCS
- * @IFF_LIVE_ADDR_CHANGE: device supports hardware address
- *	change when it's running
- * @IFF_MACVLAN: Macvlan device
- */
-enum netdev_priv_flags {
-	IFF_802_1Q_VLAN			= 1<<0,
-	IFF_EBRIDGE			= 1<<1,
-	IFF_SLAVE_INACTIVE		= 1<<2,
-	IFF_MASTER_8023AD		= 1<<3,
-	IFF_MASTER_ALB			= 1<<4,
-	IFF_BONDING			= 1<<5,
-	IFF_SLAVE_NEEDARP		= 1<<6,
-	IFF_ISATAP			= 1<<7,
-	IFF_MASTER_ARPMON		= 1<<8,
-	IFF_WAN_HDLC			= 1<<9,
-	IFF_XMIT_DST_RELEASE		= 1<<10,
-	IFF_DONT_BRIDGE			= 1<<11,
-	IFF_DISABLE_NETPOLL		= 1<<12,
-	IFF_MACVLAN_PORT		= 1<<13,
-	IFF_BRIDGE_PORT			= 1<<14,
-	IFF_OVS_DATAPATH		= 1<<15,
-	IFF_TX_SKB_SHARING		= 1<<16,
-	IFF_UNICAST_FLT			= 1<<17,
-	IFF_TEAM_PORT			= 1<<18,
-	IFF_SUPP_NOFCS			= 1<<19,
-	IFF_LIVE_ADDR_CHANGE		= 1<<20,
-	IFF_MACVLAN			= 1<<21,
-};
-
-#define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
-#define IFF_EBRIDGE			IFF_EBRIDGE
-#define IFF_SLAVE_INACTIVE		IFF_SLAVE_INACTIVE
-#define IFF_MASTER_8023AD		IFF_MASTER_8023AD
-#define IFF_MASTER_ALB			IFF_MASTER_ALB
-#define IFF_BONDING			IFF_BONDING
-#define IFF_SLAVE_NEEDARP		IFF_SLAVE_NEEDARP
-#define IFF_ISATAP			IFF_ISATAP
-#define IFF_MASTER_ARPMON		IFF_MASTER_ARPMON
-#define IFF_WAN_HDLC			IFF_WAN_HDLC
-#define IFF_XMIT_DST_RELEASE		IFF_XMIT_DST_RELEASE
-#define IFF_DONT_BRIDGE			IFF_DONT_BRIDGE
-#define IFF_DISABLE_NETPOLL		IFF_DISABLE_NETPOLL
-#define IFF_MACVLAN_PORT		IFF_MACVLAN_PORT
-#define IFF_BRIDGE_PORT			IFF_BRIDGE_PORT
-#define IFF_OVS_DATAPATH		IFF_OVS_DATAPATH
-#define IFF_TX_SKB_SHARING		IFF_TX_SKB_SHARING
-#define IFF_UNICAST_FLT			IFF_UNICAST_FLT
-#define IFF_TEAM_PORT			IFF_TEAM_PORT
-#define IFF_SUPP_NOFCS			IFF_SUPP_NOFCS
-#define IFF_LIVE_ADDR_CHANGE		IFF_LIVE_ADDR_CHANGE
-#define IFF_MACVLAN			IFF_MACVLAN
-
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
 
-- 
cgit v1.2.3


From bf0bedd371574837caaeeaa01f674cdf90508275 Mon Sep 17 00:00:00 2001
From: Amit Grover <amit.grover@samsung.com>
Date: Tue, 4 Feb 2014 06:59:58 -0300
Subject: [media] v4l2: Add settings for Horizontal and Vertical MV Search
 Range

Adding V4L2 controls for horizontal and vertical search range in pixels
for motion estimation module in video encoder.

Signed-off-by: Swami Nathan <swaminath.p@samsung.com>
Signed-off-by: Amit Grover <amit.grover@samsung.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Acked-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Signed-off-by: Kamil Debski <k.debski@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 Documentation/DocBook/media/v4l/controls.xml | 20 ++++++++++++++++++++
 drivers/media/v4l2-core/v4l2-ctrls.c         |  6 ++++++
 include/uapi/linux/v4l2-controls.h           |  2 ++
 3 files changed, 28 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/controls.xml b/Documentation/DocBook/media/v4l/controls.xml
index a5a3188e5af7..0e1770c133a8 100644
--- a/Documentation/DocBook/media/v4l/controls.xml
+++ b/Documentation/DocBook/media/v4l/controls.xml
@@ -2256,6 +2256,26 @@ Applicable to the MPEG1, MPEG2, MPEG4 encoders.</entry>
 		<entry>integer</entry>
 	      </row><row><entry spanname="descr">Sets the initial delay in milliseconds for
 VBV buffer control.</entry>
+	      </row>
+
+		  <row><entry></entry></row>
+	      <row id=""v4l2-mpeg-video-hor-search-range">
+		<entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MV_H_SEARCH_RANGE</constant>&nbsp;</entry>
+		<entry>integer</entry>
+	      </row>
+		<row><entry spanname="descr">Horizontal search range defines maximum horizontal search area in pixels
+to search and match for the present Macroblock (MB) in the reference picture. This V4L2 control macro is used to set
+horizontal search range for motion estimation module in video encoder.</entry>
+	      </row>
+
+		 <row><entry></entry></row>
+	      <row id="v4l2-mpeg-video-vert-search-range">
+		<entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MV_V_SEARCH_RANGE</constant>&nbsp;</entry>
+		<entry>integer</entry>
+	      </row>
+		<row><entry spanname="descr">Vertical search range defines maximum vertical search area in pixels
+to search and match for the present Macroblock (MB) in the reference picture. This V4L2 control macro is used to set
+vertical search range for motion estimation module in video encoder.</entry>
 	      </row>
 
 	      <row><entry></entry></row>
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 6ff002bd5909..e9e12c48c874 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -735,6 +735,8 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_MPEG_VIDEO_DEC_PTS:			return "Video Decoder PTS";
 	case V4L2_CID_MPEG_VIDEO_DEC_FRAME:			return "Video Decoder Frame Count";
 	case V4L2_CID_MPEG_VIDEO_VBV_DELAY:			return "Initial Delay for VBV Control";
+	case V4L2_CID_MPEG_VIDEO_MV_H_SEARCH_RANGE:		return "Horizontal MV Search Range";
+	case V4L2_CID_MPEG_VIDEO_MV_V_SEARCH_RANGE:		return "Vertical MV Search Range";
 	case V4L2_CID_MPEG_VIDEO_REPEAT_SEQ_HEADER:		return "Repeat Sequence Header";
 
 	/* VPX controls */
@@ -910,6 +912,10 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 		*min = 0;
 		*max = *step = 1;
 		break;
+	case V4L2_CID_MPEG_VIDEO_MV_H_SEARCH_RANGE:
+	case V4L2_CID_MPEG_VIDEO_MV_V_SEARCH_RANGE:
+		*type = V4L2_CTRL_TYPE_INTEGER;
+		break;
 	case V4L2_CID_PAN_RESET:
 	case V4L2_CID_TILT_RESET:
 	case V4L2_CID_FLASH_STROBE:
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 2cbe605bbe04..cda6fa0262fc 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -376,6 +376,8 @@ enum v4l2_mpeg_video_multi_slice_mode {
 #define V4L2_CID_MPEG_VIDEO_DEC_FRAME			(V4L2_CID_MPEG_BASE+224)
 #define V4L2_CID_MPEG_VIDEO_VBV_DELAY			(V4L2_CID_MPEG_BASE+225)
 #define V4L2_CID_MPEG_VIDEO_REPEAT_SEQ_HEADER		(V4L2_CID_MPEG_BASE+226)
+#define V4L2_CID_MPEG_VIDEO_MV_H_SEARCH_RANGE		(V4L2_CID_MPEG_BASE+227)
+#define V4L2_CID_MPEG_VIDEO_MV_V_SEARCH_RANGE		(V4L2_CID_MPEG_BASE+228)
 
 #define V4L2_CID_MPEG_VIDEO_H263_I_FRAME_QP		(V4L2_CID_MPEG_BASE+300)
 #define V4L2_CID_MPEG_VIDEO_H263_P_FRAME_QP		(V4L2_CID_MPEG_BASE+301)
-- 
cgit v1.2.3


From f19c29e3e391a66a273e9afebaf01917245148cd Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Mon, 3 Mar 2014 12:31:36 -0800
Subject: tcp: snmp stats for Fast Open, SYN rtx, and data pkts

Add the following snmp stats:

TCPFastOpenActiveFail: Fast Open attempts (SYN/data) failed beacuse
the remote does not accept it or the attempts timed out.

TCPSynRetrans: number of SYN and SYN/ACK retransmits to break down
retransmissions into SYN, fast-retransmits, timeout retransmits, etc.

TCPOrigDataSent: number of outgoing packets with original data (excluding
retransmission but including data-in-SYN). This counter is different from
TcpOutSegs because TcpOutSegs also tracks pure ACKs. TCPOrigDataSent is
more useful to track the TCP retransmission rate.

Change TCPFastOpenActive to track only successful Fast Opens to be symmetric to
TCPFastOpenPassive.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Nandita Dukkipati <nanditad@google.com>
Signed-off-by: Lawrence Brakmo <brakmo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/snmp.h | 3 +++
 net/ipv4/proc.c           | 3 +++
 net/ipv4/tcp_input.c      | 3 +++
 net/ipv4/tcp_ipv4.c       | 4 +++-
 net/ipv4/tcp_output.c     | 8 ++++++--
 net/ipv4/tcp_timer.c      | 3 +++
 net/ipv6/tcp_ipv6.c       | 4 +++-
 7 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 8d64a7e5d371..df40137f33dd 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -252,6 +252,7 @@ enum
 	LINUX_MIB_TCPCHALLENGEACK,		/* TCPChallengeACK */
 	LINUX_MIB_TCPSYNCHALLENGE,		/* TCPSYNChallenge */
 	LINUX_MIB_TCPFASTOPENACTIVE,		/* TCPFastOpenActive */
+	LINUX_MIB_TCPFASTOPENACTIVEFAIL,	/* TCPFastOpenActiveFail */
 	LINUX_MIB_TCPFASTOPENPASSIVE,		/* TCPFastOpenPassive*/
 	LINUX_MIB_TCPFASTOPENPASSIVEFAIL,	/* TCPFastOpenPassiveFail */
 	LINUX_MIB_TCPFASTOPENLISTENOVERFLOW,	/* TCPFastOpenListenOverflow */
@@ -262,6 +263,8 @@ enum
 	LINUX_MIB_TCPFROMZEROWINDOWADV,		/* TCPFromZeroWindowAdv */
 	LINUX_MIB_TCPTOZEROWINDOWADV,		/* TCPToZeroWindowAdv */
 	LINUX_MIB_TCPWANTZEROWINDOWADV,		/* TCPWantZeroWindowAdv */
+	LINUX_MIB_TCPSYNRETRANS,		/* TCPSynRetrans */
+	LINUX_MIB_TCPORIGDATASENT,		/* TCPOrigDataSent */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 99d2e9b6fac9..ad737fad6d8b 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -273,6 +273,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK),
 	SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE),
 	SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE),
+	SNMP_MIB_ITEM("TCPFastOpenActiveFail", LINUX_MIB_TCPFASTOPENACTIVEFAIL),
 	SNMP_MIB_ITEM("TCPFastOpenPassive", LINUX_MIB_TCPFASTOPENPASSIVE),
 	SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL),
 	SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW),
@@ -283,6 +284,8 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPFromZeroWindowAdv", LINUX_MIB_TCPFROMZEROWINDOWADV),
 	SNMP_MIB_ITEM("TCPToZeroWindowAdv", LINUX_MIB_TCPTOZEROWINDOWADV),
 	SNMP_MIB_ITEM("TCPWantZeroWindowAdv", LINUX_MIB_TCPWANTZEROWINDOWADV),
+	SNMP_MIB_ITEM("TCPSynRetrans", LINUX_MIB_TCPSYNRETRANS),
+	SNMP_MIB_ITEM("TCPOrigDataSent", LINUX_MIB_TCPORIGDATASENT),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 23a41d978fad..6e4809389cbf 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5393,9 +5393,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 				break;
 		}
 		tcp_rearm_rto(sk);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL);
 		return true;
 	}
 	tp->syn_data_acked = tp->syn_data;
+	if (tp->syn_data_acked)
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
 	return false;
 }
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 17c0fb172fba..c4f1d9a76c44 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -854,8 +854,10 @@ static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
 {
 	int res = tcp_v4_send_synack(sk, NULL, req, 0);
 
-	if (!res)
+	if (!res) {
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+	}
 	return res;
 }
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bf38b1fb63ab..aaa68f5b1055 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -86,6 +86,9 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 		tcp_rearm_rto(sk);
 	}
+
+	NET_ADD_STATS_BH(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
+			 tcp_skb_pcount(skb));
 }
 
 /* SND.NXT, if window was not shrunk.
@@ -2433,7 +2436,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	if (err == 0) {
 		/* Update global TCP statistics. */
 		TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
-
+		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
+			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
 		tp->total_retrans++;
 
 #if FASTRETRANS_DEBUG > 0
@@ -2958,7 +2962,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
 
 	if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {
 		tp->syn_data = (fo->copied > 0);
-		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
 		goto done;
 	}
 	syn_data = NULL;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 64f0354c84c7..286227abed10 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -165,6 +165,9 @@ static int tcp_write_timeout(struct sock *sk)
 			dst_negative_advice(sk);
 			if (tp->syn_fastopen || tp->syn_data)
 				tcp_fastopen_cache_set(sk, 0, NULL, true);
+			if (tp->syn_data)
+				NET_INC_STATS_BH(sock_net(sk),
+						 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
 		}
 		retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
 		syn_set = true;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 889079b2ea85..3277680186b4 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -501,8 +501,10 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
 	int res;
 
 	res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0);
-	if (!res)
+	if (!res) {
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+	}
 	return res;
 }
 
-- 
cgit v1.2.3


From b62faf3cdc875a1ac5a10696cf6ea0b12bab1596 Mon Sep 17 00:00:00 2001
From: Simon Wunderlich <sw@simonwunderlich.de>
Date: Mon, 3 Mar 2014 17:23:10 +0100
Subject: if_ether.h: add IEEE 802.21 Ethertype

Add the Ethertype for IEEE Std 802.21 - Media Independent Handover
Protocol. This Ethertype is used for network control messages.

Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Mathias Kretschmer <mathias.kretschmer@fokus.fraunhofer.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_ether.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index ba8b15f07940..750ba67e0dc3 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -89,6 +89,7 @@
 #define ETH_P_FCOE	0x8906		/* Fibre Channel over Ethernet  */
 #define ETH_P_TDLS	0x890D          /* TDLS */
 #define ETH_P_FIP	0x8914		/* FCoE Initialization Protocol */
+#define ETH_P_80221	0x8917		/* IEEE 802.21 Media Independent Handover Protocol */
 #define ETH_P_QINQ1	0x9100		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
 #define ETH_P_QINQ2	0x9200		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
 #define ETH_P_QINQ3	0x9300		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
-- 
cgit v1.2.3


From f3baa393ffc9a7aefc0bf767729382085e81f606 Mon Sep 17 00:00:00 2001
From: Simon Wunderlich <sw@simonwunderlich.de>
Date: Mon, 3 Mar 2014 17:23:11 +0100
Subject: UAPI: add MPLS label stack definition

Labels for the Multiprotocol Label Switching are defined in RFC 3032
which was superseded by RFC 5462. Add the definition to UAPI and a stub
header for include/linux.

Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Mathias Kretschmer <mathias.kretschmer@fokus.fraunhofer.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mpls.h      |  6 ++++++
 include/uapi/linux/mpls.h | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)
 create mode 100644 include/linux/mpls.h
 create mode 100644 include/uapi/linux/mpls.h

(limited to 'include/uapi/linux')

diff --git a/include/linux/mpls.h b/include/linux/mpls.h
new file mode 100644
index 000000000000..9999145bc190
--- /dev/null
+++ b/include/linux/mpls.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_MPLS_H
+#define _LINUX_MPLS_H
+
+#include <uapi/linux/mpls.h>
+
+#endif  /* _LINUX_MPLS_H */
diff --git a/include/uapi/linux/mpls.h b/include/uapi/linux/mpls.h
new file mode 100644
index 000000000000..bc9abfe88c9a
--- /dev/null
+++ b/include/uapi/linux/mpls.h
@@ -0,0 +1,34 @@
+#ifndef _UAPI_MPLS_H
+#define _UAPI_MPLS_H
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+/* Reference: RFC 5462, RFC 3032
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                Label                  | TC  |S|       TTL     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *	Label:  Label Value, 20 bits
+ *	TC:     Traffic Class field, 3 bits
+ *	S:      Bottom of Stack, 1 bit
+ *	TTL:    Time to Live, 8 bits
+ */
+
+struct mpls_label {
+	__be32 entry;
+};
+
+#define MPLS_LS_LABEL_MASK      0xFFFFF000
+#define MPLS_LS_LABEL_SHIFT     12
+#define MPLS_LS_TC_MASK         0x00000E00
+#define MPLS_LS_TC_SHIFT        9
+#define MPLS_LS_S_MASK          0x00000100
+#define MPLS_LS_S_SHIFT         8
+#define MPLS_LS_TTL_MASK        0x000000FF
+#define MPLS_LS_TTL_SHIFT       0
+
+#endif /* _UAPI_MPLS_H */
-- 
cgit v1.2.3


From 948cd8c18c466fdcbe707bb2a42a148796bfccdd Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 9 Oct 2013 17:19:29 +0200
Subject: usbfs: Add support for bulk stream ids

This patch makes it possible to specify a bulk stream id when submitting
an urb using the async usbfs API. It overloads the number_of_packets
usbdevfs_urb field for this. This is not pretty, but given other
constraints it is the best we can do. The reasoning leading to this goes
as follows:

1) We want to support bulk streams in the usbfs API
2) We do not want to extend the usbdevfs_urb struct with a new member, as
   that would mean defining new ioctl numbers for all async API ioctls +
   adding compat versions for the old ones (times 2 for 32 bit support)
3) 1 + 2 means we need to re-use an existing field
4) number_of_packets is only used for isoc urbs, and streams are bulk only
   so it is the best (and only) candidate for re-using

Note that:
1) This patch only uses number_of_packets as stream_id if the app has
   actually allocated streams on the ep, so that old apps which may have
   garbage in there (as it was unused until now in the bulk case), will not
   break
2) This patch does not add support for allocating / freeing bulk-streams, that
   is done in a follow up patch

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
---
 drivers/usb/core/devio.c          | 4 ++++
 include/uapi/linux/usbdevice_fs.h | 5 ++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index c88d8bfaca8d..d7571a63181d 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -1209,6 +1209,7 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
 	unsigned int u, totlen, isofrmlen;
 	int i, ret, is_in, num_sgs = 0, ifnum = -1;
 	int number_of_packets = 0;
+	unsigned int stream_id = 0;
 	void *buf;
 
 	if (uurb->flags & ~(USBDEVFS_URB_ISO_ASAP |
@@ -1294,6 +1295,8 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
 		num_sgs = DIV_ROUND_UP(uurb->buffer_length, USB_SG_SIZE);
 		if (num_sgs == 1 || num_sgs > ps->dev->bus->sg_tablesize)
 			num_sgs = 0;
+		if (ep->streams)
+			stream_id = uurb->stream_id;
 		break;
 
 	case USBDEVFS_URB_TYPE_INTERRUPT:
@@ -1444,6 +1447,7 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
 	dr = NULL;
 	as->urb->start_frame = uurb->start_frame;
 	as->urb->number_of_packets = number_of_packets;
+	as->urb->stream_id = stream_id;
 	if (uurb->type == USBDEVFS_URB_TYPE_ISO ||
 			ps->dev->speed == USB_SPEED_HIGH)
 		as->urb->interval = 1 << min(15, ep->desc.bInterval - 1);
diff --git a/include/uapi/linux/usbdevice_fs.h b/include/uapi/linux/usbdevice_fs.h
index 0c65e4b12617..cbf122db56bc 100644
--- a/include/uapi/linux/usbdevice_fs.h
+++ b/include/uapi/linux/usbdevice_fs.h
@@ -102,7 +102,10 @@ struct usbdevfs_urb {
 	int buffer_length;
 	int actual_length;
 	int start_frame;
-	int number_of_packets;
+	union {
+		int number_of_packets;	/* Only used for isoc urbs */
+		unsigned int stream_id;	/* Only used with bulk streams */
+	};
 	int error_count;
 	unsigned int signr;	/* signal to be sent on completion,
 				  or 0 if none should be sent. */
-- 
cgit v1.2.3


From bcf7f6e39335af4f03da8c26a98185fd49754fcc Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 9 Oct 2013 17:19:31 +0200
Subject: usbfs: Add support for allocating / freeing streams

This allows userspace to use bulk-streams, just like in kernel drivers, see
Documentation/usb/bulk-streams.txt for details on the in kernel API. This
is exported pretty much one on one to userspace.

To use streams an app must first make a USBDEVFS_ALLOC_STREAMS ioctl,
on success this will return the number of streams available (which may be
less then requested). If there are n streams the app can then submit
usbdevfs_urb-s with their stream_id member set to 1-n to use a specific
stream. IE if USBDEVFS_ALLOC_STREAMS returns 4 then stream_id 1-4 can be
used.

When the app is done using streams it should call USBDEVFS_FREE_STREAMS

Note applications are advised to use libusb rather then using the
usbdevfs api directly. The latest version of libusb has support for streams.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
---
 drivers/usb/core/devio.c          | 118 ++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/usbdevice_fs.h |   7 +++
 2 files changed, 125 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 502974b4deb5..12401ee4ba0e 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -778,6 +778,79 @@ static struct usb_host_endpoint *ep_to_host_endpoint(struct usb_device *dev,
 		return dev->ep_out[ep & USB_ENDPOINT_NUMBER_MASK];
 }
 
+static int parse_usbdevfs_streams(struct dev_state *ps,
+				  struct usbdevfs_streams __user *streams,
+				  unsigned int *num_streams_ret,
+				  unsigned int *num_eps_ret,
+				  struct usb_host_endpoint ***eps_ret,
+				  struct usb_interface **intf_ret)
+{
+	unsigned int i, num_streams, num_eps;
+	struct usb_host_endpoint **eps;
+	struct usb_interface *intf = NULL;
+	unsigned char ep;
+	int ifnum, ret;
+
+	if (get_user(num_streams, &streams->num_streams) ||
+	    get_user(num_eps, &streams->num_eps))
+		return -EFAULT;
+
+	if (num_eps < 1 || num_eps > USB_MAXENDPOINTS)
+		return -EINVAL;
+
+	/* The XHCI controller allows max 2 ^ 16 streams */
+	if (num_streams_ret && (num_streams < 2 || num_streams > 65536))
+		return -EINVAL;
+
+	eps = kmalloc(num_eps * sizeof(*eps), GFP_KERNEL);
+	if (!eps)
+		return -ENOMEM;
+
+	for (i = 0; i < num_eps; i++) {
+		if (get_user(ep, &streams->eps[i])) {
+			ret = -EFAULT;
+			goto error;
+		}
+		eps[i] = ep_to_host_endpoint(ps->dev, ep);
+		if (!eps[i]) {
+			ret = -EINVAL;
+			goto error;
+		}
+
+		/* usb_alloc/free_streams operate on an usb_interface */
+		ifnum = findintfep(ps->dev, ep);
+		if (ifnum < 0) {
+			ret = ifnum;
+			goto error;
+		}
+
+		if (i == 0) {
+			ret = checkintf(ps, ifnum);
+			if (ret < 0)
+				goto error;
+			intf = usb_ifnum_to_if(ps->dev, ifnum);
+		} else {
+			/* Verify all eps belong to the same interface */
+			if (ifnum != intf->altsetting->desc.bInterfaceNumber) {
+				ret = -EINVAL;
+				goto error;
+			}
+		}
+	}
+
+	if (num_streams_ret)
+		*num_streams_ret = num_streams;
+	*num_eps_ret = num_eps;
+	*eps_ret = eps;
+	*intf_ret = intf;
+
+	return 0;
+
+error:
+	kfree(eps);
+	return ret;
+}
+
 static int match_devt(struct device *dev, void *data)
 {
 	return dev->devt == (dev_t) (unsigned long) data;
@@ -2009,6 +2082,45 @@ static int proc_disconnect_claim(struct dev_state *ps, void __user *arg)
 	return claimintf(ps, dc.interface);
 }
 
+static int proc_alloc_streams(struct dev_state *ps, void __user *arg)
+{
+	unsigned num_streams, num_eps;
+	struct usb_host_endpoint **eps;
+	struct usb_interface *intf;
+	int r;
+
+	r = parse_usbdevfs_streams(ps, arg, &num_streams, &num_eps,
+				   &eps, &intf);
+	if (r)
+		return r;
+
+	destroy_async_on_interface(ps,
+				   intf->altsetting[0].desc.bInterfaceNumber);
+
+	r = usb_alloc_streams(intf, eps, num_eps, num_streams, GFP_KERNEL);
+	kfree(eps);
+	return r;
+}
+
+static int proc_free_streams(struct dev_state *ps, void __user *arg)
+{
+	unsigned num_eps;
+	struct usb_host_endpoint **eps;
+	struct usb_interface *intf;
+	int r;
+
+	r = parse_usbdevfs_streams(ps, arg, NULL, &num_eps, &eps, &intf);
+	if (r)
+		return r;
+
+	destroy_async_on_interface(ps,
+				   intf->altsetting[0].desc.bInterfaceNumber);
+
+	r = usb_free_streams(intf, eps, num_eps, GFP_KERNEL);
+	kfree(eps);
+	return r;
+}
+
 /*
  * NOTE:  All requests here that have interface numbers as parameters
  * are assuming that somehow the configuration has been prevented from
@@ -2185,6 +2297,12 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
 	case USBDEVFS_DISCONNECT_CLAIM:
 		ret = proc_disconnect_claim(ps, p);
 		break;
+	case USBDEVFS_ALLOC_STREAMS:
+		ret = proc_alloc_streams(ps, p);
+		break;
+	case USBDEVFS_FREE_STREAMS:
+		ret = proc_free_streams(ps, p);
+		break;
 	}
 	usb_unlock_device(dev);
 	if (ret >= 0)
diff --git a/include/uapi/linux/usbdevice_fs.h b/include/uapi/linux/usbdevice_fs.h
index cbf122db56bc..abe5f4bd4d82 100644
--- a/include/uapi/linux/usbdevice_fs.h
+++ b/include/uapi/linux/usbdevice_fs.h
@@ -147,6 +147,11 @@ struct usbdevfs_disconnect_claim {
 	char driver[USBDEVFS_MAXDRIVERNAME + 1];
 };
 
+struct usbdevfs_streams {
+	unsigned int num_streams; /* Not used by USBDEVFS_FREE_STREAMS */
+	unsigned int num_eps;
+	unsigned char eps[0];
+};
 
 #define USBDEVFS_CONTROL           _IOWR('U', 0, struct usbdevfs_ctrltransfer)
 #define USBDEVFS_CONTROL32           _IOWR('U', 0, struct usbdevfs_ctrltransfer32)
@@ -179,5 +184,7 @@ struct usbdevfs_disconnect_claim {
 #define USBDEVFS_RELEASE_PORT      _IOR('U', 25, unsigned int)
 #define USBDEVFS_GET_CAPABILITIES  _IOR('U', 26, __u32)
 #define USBDEVFS_DISCONNECT_CLAIM  _IOR('U', 27, struct usbdevfs_disconnect_claim)
+#define USBDEVFS_ALLOC_STREAMS     _IOR('U', 28, struct usbdevfs_streams)
+#define USBDEVFS_FREE_STREAMS      _IOR('U', 29, struct usbdevfs_streams)
 
 #endif /* _UAPI_LINUX_USBDEVICE_FS_H */
-- 
cgit v1.2.3


From 84099a282e1ba0facf7eee5108031a8f38f4b4fe Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Wed, 11 Dec 2013 20:24:02 -0300
Subject: [media] v4l: add new tuner types for SDR

Define tuner types V4L2_TUNER_ADC and V4L2_TUNER_RF for SDR usage.

ADC is used for setting sampling rate (sampling frequency) to SDR
device.

Another tuner type, named as V4L2_TUNER_RF, is possible RF tuner.
Is is used to down-convert RF frequency to range ADC could sample.
Having RF tuner is optional, whilst in practice it is almost always
there.

Also add checks to VIDIOC_G_FREQUENCY, VIDIOC_S_FREQUENCY and
VIDIOC_ENUM_FREQ_BANDS only allow these two tuner types when device
type is SDR (VFL_TYPE_SDR). For VIDIOC_G_FREQUENCY we do not check
tuner type, instead override type with V4L2_TUNER_ADC in every
case (requested by Hans in order to keep functionality in line with
existing tuners and existing API does not specify it).

Prohibit VIDIOC_S_HW_FREQ_SEEK explicitly when device type is SDR,
as device cannot do hardware seek without a hardware demodulator.

Signed-off-by: Antti Palosaari <crope@iki.fi>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/v4l2-core/v4l2-ioctl.c | 39 ++++++++++++++++++++++++++----------
 include/uapi/linux/videodev2.h       |  2 ++
 2 files changed, 30 insertions(+), 11 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 707aef705a47..15ab34968fb0 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -1291,8 +1291,11 @@ static int v4l_g_frequency(const struct v4l2_ioctl_ops *ops,
 	struct video_device *vfd = video_devdata(file);
 	struct v4l2_frequency *p = arg;
 
-	p->type = (vfd->vfl_type == VFL_TYPE_RADIO) ?
-			V4L2_TUNER_RADIO : V4L2_TUNER_ANALOG_TV;
+	if (vfd->vfl_type == VFL_TYPE_SDR)
+		p->type = V4L2_TUNER_ADC;
+	else
+		p->type = (vfd->vfl_type == VFL_TYPE_RADIO) ?
+				V4L2_TUNER_RADIO : V4L2_TUNER_ANALOG_TV;
 	return ops->vidioc_g_frequency(file, fh, p);
 }
 
@@ -1303,10 +1306,15 @@ static int v4l_s_frequency(const struct v4l2_ioctl_ops *ops,
 	const struct v4l2_frequency *p = arg;
 	enum v4l2_tuner_type type;
 
-	type = (vfd->vfl_type == VFL_TYPE_RADIO) ?
-			V4L2_TUNER_RADIO : V4L2_TUNER_ANALOG_TV;
-	if (p->type != type)
-		return -EINVAL;
+	if (vfd->vfl_type == VFL_TYPE_SDR) {
+		if (p->type != V4L2_TUNER_ADC && p->type != V4L2_TUNER_RF)
+			return -EINVAL;
+	} else {
+		type = (vfd->vfl_type == VFL_TYPE_RADIO) ?
+				V4L2_TUNER_RADIO : V4L2_TUNER_ANALOG_TV;
+		if (type != p->type)
+			return -EINVAL;
+	}
 	return ops->vidioc_s_frequency(file, fh, p);
 }
 
@@ -1386,6 +1394,10 @@ static int v4l_s_hw_freq_seek(const struct v4l2_ioctl_ops *ops,
 	struct v4l2_hw_freq_seek *p = arg;
 	enum v4l2_tuner_type type;
 
+	/* s_hw_freq_seek is not supported for SDR for now */
+	if (vfd->vfl_type == VFL_TYPE_SDR)
+		return -EINVAL;
+
 	type = (vfd->vfl_type == VFL_TYPE_RADIO) ?
 		V4L2_TUNER_RADIO : V4L2_TUNER_ANALOG_TV;
 	if (p->type != type)
@@ -1885,11 +1897,16 @@ static int v4l_enum_freq_bands(const struct v4l2_ioctl_ops *ops,
 	enum v4l2_tuner_type type;
 	int err;
 
-	type = (vfd->vfl_type == VFL_TYPE_RADIO) ?
-			V4L2_TUNER_RADIO : V4L2_TUNER_ANALOG_TV;
-
-	if (type != p->type)
-		return -EINVAL;
+	if (vfd->vfl_type == VFL_TYPE_SDR) {
+		if (p->type != V4L2_TUNER_ADC && p->type != V4L2_TUNER_RF)
+			return -EINVAL;
+		type = p->type;
+	} else {
+		type = (vfd->vfl_type == VFL_TYPE_RADIO) ?
+				V4L2_TUNER_RADIO : V4L2_TUNER_ANALOG_TV;
+		if (type != p->type)
+			return -EINVAL;
+	}
 	if (ops->vidioc_enum_freq_bands)
 		return ops->vidioc_enum_freq_bands(file, fh, p);
 	if (is_valid_ioctl(vfd, VIDIOC_G_TUNER)) {
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 6ae7bbe988cc..9dc79d104d65 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -159,6 +159,8 @@ enum v4l2_tuner_type {
 	V4L2_TUNER_RADIO	     = 1,
 	V4L2_TUNER_ANALOG_TV	     = 2,
 	V4L2_TUNER_DIGITAL_TV	     = 3,
+	V4L2_TUNER_ADC               = 4,
+	V4L2_TUNER_RF                = 5,
 };
 
 enum v4l2_memory {
-- 
cgit v1.2.3


From 67f9a11759d7d382d79774cf16c0bf6cb87dc4f0 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Wed, 11 Dec 2013 20:27:31 -0300
Subject: [media] v4l: 1 Hz resolution flag for tuners

Add V4L2_TUNER_CAP_1HZ for 1 Hz resolution.

Signed-off-by: Antti Palosaari <crope@iki.fi>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/videodev2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 9dc79d104d65..1cf20765482e 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1341,6 +1341,7 @@ struct v4l2_modulator {
 #define V4L2_TUNER_CAP_RDS_CONTROLS	0x0200
 #define V4L2_TUNER_CAP_FREQ_BANDS	0x0400
 #define V4L2_TUNER_CAP_HWSEEK_PROG_LIM	0x0800
+#define V4L2_TUNER_CAP_1HZ		0x1000
 
 /*  Flags for the 'rxsubchans' field */
 #define V4L2_TUNER_SUB_MONO		0x0001
-- 
cgit v1.2.3


From 6f3073b8dcdcf7a0eabbdd60ec3706bb9d4a131c Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Thu, 12 Dec 2013 13:34:30 -0300
Subject: [media] v4l: add stream format for SDR receiver

Add new V4L2 stream format definition, V4L2_BUF_TYPE_SDR_CAPTURE,
for SDR receiver.

Signed-off-by: Antti Palosaari <crope@iki.fi>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/v4l2-core/v4l2-ioctl.c |  1 +
 include/trace/events/v4l2.h          |  1 +
 include/uapi/linux/videodev2.h       | 11 +++++++++++
 3 files changed, 13 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 15ab34968fb0..9a2acaf75426 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -152,6 +152,7 @@ const char *v4l2_type_names[] = {
 	[V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY] = "vid-out-overlay",
 	[V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE] = "vid-cap-mplane",
 	[V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE] = "vid-out-mplane",
+	[V4L2_BUF_TYPE_SDR_CAPTURE]        = "sdr-cap",
 };
 EXPORT_SYMBOL(v4l2_type_names);
 
diff --git a/include/trace/events/v4l2.h b/include/trace/events/v4l2.h
index ef94ecad1c94..b9bb1f204693 100644
--- a/include/trace/events/v4l2.h
+++ b/include/trace/events/v4l2.h
@@ -18,6 +18,7 @@
 		{ V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY, "VIDEO_OUTPUT_OVERLAY" },\
 		{ V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE, "VIDEO_CAPTURE_MPLANE" },\
 		{ V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE,  "VIDEO_OUTPUT_MPLANE" }, \
+		{ V4L2_BUF_TYPE_SDR_CAPTURE,          "SDR_CAPTURE" },         \
 		{ V4L2_BUF_TYPE_PRIVATE,	      "PRIVATE" })
 
 #define show_field(field)						\
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 1cf20765482e..27bed7c99b46 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -139,6 +139,7 @@ enum v4l2_buf_type {
 #endif
 	V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE = 9,
 	V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE  = 10,
+	V4L2_BUF_TYPE_SDR_CAPTURE          = 11,
 	/* Deprecated, do not use */
 	V4L2_BUF_TYPE_PRIVATE              = 0x80,
 };
@@ -1694,6 +1695,15 @@ struct v4l2_pix_format_mplane {
 	__u8				reserved[11];
 } __attribute__ ((packed));
 
+/**
+ * struct v4l2_format_sdr - SDR format definition
+ * @pixelformat:	little endian four character code (fourcc)
+ */
+struct v4l2_format_sdr {
+	__u32				pixelformat;
+	__u8				reserved[28];
+} __attribute__ ((packed));
+
 /**
  * struct v4l2_format - stream data format
  * @type:	enum v4l2_buf_type; type of the data stream
@@ -1712,6 +1722,7 @@ struct v4l2_format {
 		struct v4l2_window		win;     /* V4L2_BUF_TYPE_VIDEO_OVERLAY */
 		struct v4l2_vbi_format		vbi;     /* V4L2_BUF_TYPE_VBI_CAPTURE */
 		struct v4l2_sliced_vbi_format	sliced;  /* V4L2_BUF_TYPE_SLICED_VBI_CAPTURE */
+		struct v4l2_format_sdr		sdr;     /* V4L2_BUF_TYPE_SDR_CAPTURE */
 		__u8	raw_data[200];                   /* user-defined */
 	} fmt;
 };
-- 
cgit v1.2.3


From c9c54f72d4a8fe02a94c345dde45f42628325223 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Tue, 17 Dec 2013 17:41:12 -0300
Subject: [media] v4l: add device capability flag for SDR receiver

VIDIOC_QUERYCAP IOCTL is used to query device capabilities. Add new
capability flag to inform given device supports SDR capture.

Signed-off-by: Antti Palosaari <crope@iki.fi>
Acked-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/videodev2.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 27bed7c99b46..27fedfe4f33b 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -267,6 +267,8 @@ struct v4l2_capability {
 #define V4L2_CAP_RADIO			0x00040000  /* is a radio device */
 #define V4L2_CAP_MODULATOR		0x00080000  /* has a modulator */
 
+#define V4L2_CAP_SDR_CAPTURE		0x00100000  /* Is a SDR capture device */
+
 #define V4L2_CAP_READWRITE              0x01000000  /* read/write systemcalls */
 #define V4L2_CAP_ASYNCIO                0x02000000  /* async I/O */
 #define V4L2_CAP_STREAMING              0x04000000  /* streaming I/O ioctls */
-- 
cgit v1.2.3


From 80807fada4398d11ebd2bb28b3b49ca6a59e1260 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Fri, 24 Jan 2014 23:44:26 -0300
Subject: [media] v4l: add RF tuner gain controls

Modern silicon RF tuners used nowadays has many controllable gain
stages on signal path. Usually, but not always, there is at least
3 gain stages. Also on some cases there could be multiple gain
stages within the ones specified here. However, I think that having
these three controllable gain stages offers enough fine-tuning for
real use cases.

1) LNA gain. That is first gain just after antenna input.
2) Mixer gain. It is located quite middle of the signal path, where
RF signal is down-converted to IF/BB.
3) IF gain. That is last gain in order to adjust output signal level
to optimal level for receiving party (usually demodulator ADC).

Each gain stage could be set rather often both manual or automatic
(AGC) mode. Due to that add separate controls for controlling
operation mode.

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/v4l2-core/v4l2-ctrls.c | 15 +++++++++++++++
 include/uapi/linux/v4l2-controls.h   | 11 +++++++++++
 2 files changed, 26 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index e9e12c48c874..1168f683fd48 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -859,6 +859,14 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_FM_RX_CLASS:		return "FM Radio Receiver Controls";
 	case V4L2_CID_TUNE_DEEMPHASIS:		return "De-Emphasis";
 	case V4L2_CID_RDS_RECEPTION:		return "RDS Reception";
+
+	case V4L2_CID_RF_TUNER_CLASS:		return "RF Tuner Controls";
+	case V4L2_CID_RF_TUNER_LNA_GAIN_AUTO:	return "LNA Gain, Auto";
+	case V4L2_CID_RF_TUNER_LNA_GAIN:	return "LNA Gain";
+	case V4L2_CID_RF_TUNER_MIXER_GAIN_AUTO:	return "Mixer Gain, Auto";
+	case V4L2_CID_RF_TUNER_MIXER_GAIN:	return "Mixer Gain";
+	case V4L2_CID_RF_TUNER_IF_GAIN_AUTO:	return "IF Gain, Auto";
+	case V4L2_CID_RF_TUNER_IF_GAIN:		return "IF Gain";
 	default:
 		return NULL;
 	}
@@ -908,6 +916,9 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_WIDE_DYNAMIC_RANGE:
 	case V4L2_CID_IMAGE_STABILIZATION:
 	case V4L2_CID_RDS_RECEPTION:
+	case V4L2_CID_RF_TUNER_LNA_GAIN_AUTO:
+	case V4L2_CID_RF_TUNER_MIXER_GAIN_AUTO:
+	case V4L2_CID_RF_TUNER_IF_GAIN_AUTO:
 		*type = V4L2_CTRL_TYPE_BOOLEAN;
 		*min = 0;
 		*max = *step = 1;
@@ -997,6 +1008,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_IMAGE_PROC_CLASS:
 	case V4L2_CID_DV_CLASS:
 	case V4L2_CID_FM_RX_CLASS:
+	case V4L2_CID_RF_TUNER_CLASS:
 		*type = V4L2_CTRL_TYPE_CTRL_CLASS;
 		/* You can neither read not write these */
 		*flags |= V4L2_CTRL_FLAG_READ_ONLY | V4L2_CTRL_FLAG_WRITE_ONLY;
@@ -1069,6 +1081,9 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_PILOT_TONE_FREQUENCY:
 	case V4L2_CID_TUNE_POWER_LEVEL:
 	case V4L2_CID_TUNE_ANTENNA_CAPACITOR:
+	case V4L2_CID_RF_TUNER_LNA_GAIN:
+	case V4L2_CID_RF_TUNER_MIXER_GAIN:
+	case V4L2_CID_RF_TUNER_IF_GAIN:
 		*flags |= V4L2_CTRL_FLAG_SLIDER;
 		break;
 	case V4L2_CID_PAN_RELATIVE:
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index cda6fa0262fc..e97101c1686f 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -60,6 +60,7 @@
 #define V4L2_CTRL_CLASS_IMAGE_PROC	0x009f0000	/* Image processing controls */
 #define V4L2_CTRL_CLASS_DV		0x00a00000	/* Digital Video controls */
 #define V4L2_CTRL_CLASS_FM_RX		0x00a10000	/* FM Receiver controls */
+#define V4L2_CTRL_CLASS_RF_TUNER	0x00a20000	/* RF tuner controls */
 
 /* User-class control IDs */
 
@@ -897,4 +898,14 @@ enum v4l2_deemphasis {
 
 #define V4L2_CID_RDS_RECEPTION			(V4L2_CID_FM_RX_CLASS_BASE + 2)
 
+#define V4L2_CID_RF_TUNER_CLASS_BASE		(V4L2_CTRL_CLASS_RF_TUNER | 0x900)
+#define V4L2_CID_RF_TUNER_CLASS			(V4L2_CTRL_CLASS_RF_TUNER | 1)
+
+#define V4L2_CID_RF_TUNER_LNA_GAIN_AUTO		(V4L2_CID_RF_TUNER_CLASS_BASE + 1)
+#define V4L2_CID_RF_TUNER_LNA_GAIN		(V4L2_CID_RF_TUNER_CLASS_BASE + 2)
+#define V4L2_CID_RF_TUNER_MIXER_GAIN_AUTO	(V4L2_CID_RF_TUNER_CLASS_BASE + 3)
+#define V4L2_CID_RF_TUNER_MIXER_GAIN		(V4L2_CID_RF_TUNER_CLASS_BASE + 4)
+#define V4L2_CID_RF_TUNER_IF_GAIN_AUTO		(V4L2_CID_RF_TUNER_CLASS_BASE + 5)
+#define V4L2_CID_RF_TUNER_IF_GAIN		(V4L2_CID_RF_TUNER_CLASS_BASE + 6)
+
 #endif
-- 
cgit v1.2.3


From 939f1377fbdb5d0d6d6ee1e234b8ab9328ca77ef Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@iki.fi>
Date: Sun, 25 Aug 2013 14:00:43 -0300
Subject: [media] v4l: Use full 32 bits for buffer flags

The buffer flags field is 32 bits but the defined only used 16. This is
fine, but as more than 16 bits will be used in the very near future, define
them as 32-bit numbers for consistency.

Signed-off-by: Sakari Ailus <sakari.ailus@iki.fi>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 Documentation/DocBook/media/v4l/io.xml | 30 +++++++++++++--------------
 include/uapi/linux/videodev2.h         | 38 ++++++++++++++++++++--------------
 2 files changed, 38 insertions(+), 30 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/io.xml b/Documentation/DocBook/media/v4l/io.xml
index 89544e4495a9..5a2e97bbc6bf 100644
--- a/Documentation/DocBook/media/v4l/io.xml
+++ b/Documentation/DocBook/media/v4l/io.xml
@@ -990,7 +990,7 @@ should set this to 0.</entry>
 	<tbody valign="top">
 	  <row>
 	    <entry><constant>V4L2_BUF_FLAG_MAPPED</constant></entry>
-	    <entry>0x0001</entry>
+	    <entry>0x00000001</entry>
 	    <entry>The buffer resides in device memory and has been mapped
 into the application's address space, see <xref linkend="mmap" /> for details.
 Drivers set or clear this flag when the
@@ -1000,7 +1000,7 @@ Drivers set or clear this flag when the
 	  </row>
 	  <row>
 	    <entry><constant>V4L2_BUF_FLAG_QUEUED</constant></entry>
-	    <entry>0x0002</entry>
+	    <entry>0x00000002</entry>
 	  <entry>Internally drivers maintain two buffer queues, an
 incoming and outgoing queue. When this flag is set, the buffer is
 currently on the incoming queue. It automatically moves to the
@@ -1013,7 +1013,7 @@ cleared.</entry>
 	  </row>
 	  <row>
 	    <entry><constant>V4L2_BUF_FLAG_DONE</constant></entry>
-	    <entry>0x0004</entry>
+	    <entry>0x00000004</entry>
 	    <entry>When this flag is set, the buffer is currently on
 the outgoing queue, ready to be dequeued from the driver. Drivers set
 or clear this flag when the <constant>VIDIOC_QUERYBUF</constant> ioctl
@@ -1027,7 +1027,7 @@ state, in the application domain to say so.</entry>
 	  </row>
 	  <row>
 	    <entry><constant>V4L2_BUF_FLAG_ERROR</constant></entry>
-	    <entry>0x0040</entry>
+	    <entry>0x00000040</entry>
 	    <entry>When this flag is set, the buffer has been dequeued
 	    successfully, although the data might have been corrupted.
 	    This is recoverable, streaming may continue as normal and
@@ -1037,7 +1037,7 @@ state, in the application domain to say so.</entry>
 	  </row>
 	  <row>
 	    <entry><constant>V4L2_BUF_FLAG_KEYFRAME</constant></entry>
-	    <entry>0x0008</entry>
+	    <entry>0x00000008</entry>
 	  <entry>Drivers set or clear this flag when calling the
 <constant>VIDIOC_DQBUF</constant> ioctl. It may be set by video
 capture devices when the buffer contains a compressed image which is a
@@ -1045,27 +1045,27 @@ key frame (or field), &ie; can be decompressed on its own.</entry>
 	  </row>
 	  <row>
 	    <entry><constant>V4L2_BUF_FLAG_PFRAME</constant></entry>
-	    <entry>0x0010</entry>
+	    <entry>0x00000010</entry>
 	    <entry>Similar to <constant>V4L2_BUF_FLAG_KEYFRAME</constant>
 this flags predicted frames or fields which contain only differences to a
 previous key frame.</entry>
 	  </row>
 	  <row>
 	    <entry><constant>V4L2_BUF_FLAG_BFRAME</constant></entry>
-	    <entry>0x0020</entry>
+	    <entry>0x00000020</entry>
 	    <entry>Similar to <constant>V4L2_BUF_FLAG_PFRAME</constant>
 	this is a bidirectional predicted frame or field. [ooc tbd]</entry>
 	  </row>
 	  <row>
 	    <entry><constant>V4L2_BUF_FLAG_TIMECODE</constant></entry>
-	    <entry>0x0100</entry>
+	    <entry>0x00000100</entry>
 	    <entry>The <structfield>timecode</structfield> field is valid.
 Drivers set or clear this flag when the <constant>VIDIOC_DQBUF</constant>
 ioctl is called.</entry>
 	  </row>
 	  <row>
 	    <entry><constant>V4L2_BUF_FLAG_PREPARED</constant></entry>
-	    <entry>0x0400</entry>
+	    <entry>0x00000400</entry>
 	    <entry>The buffer has been prepared for I/O and can be queued by the
 application. Drivers set or clear this flag when the
 <link linkend="vidioc-querybuf">VIDIOC_QUERYBUF</link>, <link
@@ -1075,7 +1075,7 @@ application. Drivers set or clear this flag when the
 	  </row>
 	  <row>
 	    <entry><constant>V4L2_BUF_FLAG_NO_CACHE_INVALIDATE</constant></entry>
-	    <entry>0x0800</entry>
+	    <entry>0x00000800</entry>
 	    <entry>Caches do not have to be invalidated for this buffer.
 Typically applications shall use this flag if the data captured in the buffer
 is not going to be touched by the CPU, instead the buffer will, probably, be
@@ -1084,7 +1084,7 @@ passed on to a DMA-capable hardware unit for further processing or output.
 	  </row>
 	  <row>
 	    <entry><constant>V4L2_BUF_FLAG_NO_CACHE_CLEAN</constant></entry>
-	    <entry>0x1000</entry>
+	    <entry>0x00001000</entry>
 	    <entry>Caches do not have to be cleaned for this buffer.
 Typically applications shall use this flag for output buffers if the data
 in this buffer has not been created by the CPU but by some DMA-capable unit,
@@ -1092,7 +1092,7 @@ in which case caches have not been used.</entry>
 	  </row>
 	  <row>
 	    <entry><constant>V4L2_BUF_FLAG_TIMESTAMP_MASK</constant></entry>
-	    <entry>0xe000</entry>
+	    <entry>0x0000e000</entry>
 	    <entry>Mask for timestamp types below. To test the
 	    timestamp type, mask out bits not belonging to timestamp
 	    type by performing a logical and operation with buffer
@@ -1100,7 +1100,7 @@ in which case caches have not been used.</entry>
 	  </row>
 	  <row>
 	    <entry><constant>V4L2_BUF_FLAG_TIMESTAMP_UNKNOWN</constant></entry>
-	    <entry>0x0000</entry>
+	    <entry>0x00000000</entry>
 	    <entry>Unknown timestamp type. This type is used by
 	    drivers before Linux 3.9 and may be either monotonic (see
 	    below) or realtime (wall clock). Monotonic clock has been
@@ -1113,7 +1113,7 @@ in which case caches have not been used.</entry>
 	  </row>
 	  <row>
 	    <entry><constant>V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC</constant></entry>
-	    <entry>0x2000</entry>
+	    <entry>0x00002000</entry>
 	    <entry>The buffer timestamp has been taken from the
 	    <constant>CLOCK_MONOTONIC</constant> clock. To access the
 	    same clock outside V4L2, use
@@ -1121,7 +1121,7 @@ in which case caches have not been used.</entry>
 	  </row>
 	  <row>
 	    <entry><constant>V4L2_BUF_FLAG_TIMESTAMP_COPY</constant></entry>
-	    <entry>0x4000</entry>
+	    <entry>0x00004000</entry>
 	    <entry>The CAPTURE buffer timestamp has been taken from the
 	    corresponding OUTPUT buffer. This flag applies only to mem2mem devices.</entry>
 	  </row>
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 27fedfe4f33b..cb838765dd40 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -674,24 +674,32 @@ struct v4l2_buffer {
 };
 
 /*  Flags for 'flags' field */
-#define V4L2_BUF_FLAG_MAPPED	0x0001  /* Buffer is mapped (flag) */
-#define V4L2_BUF_FLAG_QUEUED	0x0002	/* Buffer is queued for processing */
-#define V4L2_BUF_FLAG_DONE	0x0004	/* Buffer is ready */
-#define V4L2_BUF_FLAG_KEYFRAME	0x0008	/* Image is a keyframe (I-frame) */
-#define V4L2_BUF_FLAG_PFRAME	0x0010	/* Image is a P-frame */
-#define V4L2_BUF_FLAG_BFRAME	0x0020	/* Image is a B-frame */
+/* Buffer is mapped (flag) */
+#define V4L2_BUF_FLAG_MAPPED			0x00000001
+/* Buffer is queued for processing */
+#define V4L2_BUF_FLAG_QUEUED			0x00000002
+/* Buffer is ready */
+#define V4L2_BUF_FLAG_DONE			0x00000004
+/* Image is a keyframe (I-frame) */
+#define V4L2_BUF_FLAG_KEYFRAME			0x00000008
+/* Image is a P-frame */
+#define V4L2_BUF_FLAG_PFRAME			0x00000010
+/* Image is a B-frame */
+#define V4L2_BUF_FLAG_BFRAME			0x00000020
 /* Buffer is ready, but the data contained within is corrupted. */
-#define V4L2_BUF_FLAG_ERROR	0x0040
-#define V4L2_BUF_FLAG_TIMECODE	0x0100	/* timecode field is valid */
-#define V4L2_BUF_FLAG_PREPARED	0x0400	/* Buffer is prepared for queuing */
+#define V4L2_BUF_FLAG_ERROR			0x00000040
+/* timecode field is valid */
+#define V4L2_BUF_FLAG_TIMECODE			0x00000100
+/* Buffer is prepared for queuing */
+#define V4L2_BUF_FLAG_PREPARED			0x00000400
 /* Cache handling flags */
-#define V4L2_BUF_FLAG_NO_CACHE_INVALIDATE	0x0800
-#define V4L2_BUF_FLAG_NO_CACHE_CLEAN		0x1000
+#define V4L2_BUF_FLAG_NO_CACHE_INVALIDATE	0x00000800
+#define V4L2_BUF_FLAG_NO_CACHE_CLEAN		0x00001000
 /* Timestamp type */
-#define V4L2_BUF_FLAG_TIMESTAMP_MASK		0xe000
-#define V4L2_BUF_FLAG_TIMESTAMP_UNKNOWN		0x0000
-#define V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC	0x2000
-#define V4L2_BUF_FLAG_TIMESTAMP_COPY		0x4000
+#define V4L2_BUF_FLAG_TIMESTAMP_MASK		0x0000e000
+#define V4L2_BUF_FLAG_TIMESTAMP_UNKNOWN		0x00000000
+#define V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC	0x00002000
+#define V4L2_BUF_FLAG_TIMESTAMP_COPY		0x00004000
 
 /**
  * struct v4l2_exportbuffer - export of video buffer as DMABUF file descriptor
-- 
cgit v1.2.3


From 872484ce40881e295b046adf21f7211306477751 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@iki.fi>
Date: Sun, 25 Aug 2013 17:57:03 -0300
Subject: [media] v4l: Add timestamp source flags, mask and document them

Some devices do not produce timestamps that correspond to the end of the
frame. The user space should be informed on the matter. This patch achieves
that by adding buffer flags (and a mask) for timestamp sources since more
possible timestamping points are expected than just two.

A three-bit mask is defined (V4L2_BUF_FLAG_TSTAMP_SRC_MASK) and two of the
eight possible values is are defined V4L2_BUF_FLAG_TSTAMP_SRC_EOF for end of
frame (value zero) V4L2_BUF_FLAG_TSTAMP_SRC_SOE for start of exposure (next
value).

Signed-off-by: Sakari Ailus <sakari.ailus@iki.fi>
Acked-by: Kamil Debski <k.debski@samsung.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 Documentation/DocBook/media/v4l/io.xml   | 36 ++++++++++++++++++++++++++------
 drivers/media/v4l2-core/videobuf2-core.c |  4 +++-
 include/media/videobuf2-core.h           |  2 ++
 include/uapi/linux/videodev2.h           |  4 ++++
 4 files changed, 39 insertions(+), 7 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/io.xml b/Documentation/DocBook/media/v4l/io.xml
index 5a2e97bbc6bf..1e7ea3c2e2ad 100644
--- a/Documentation/DocBook/media/v4l/io.xml
+++ b/Documentation/DocBook/media/v4l/io.xml
@@ -653,12 +653,6 @@ plane, are stored in struct <structname>v4l2_plane</structname> instead.
 In that case, struct <structname>v4l2_buffer</structname> contains an array of
 plane structures.</para>
 
-      <para>For timestamp types that are sampled from the system clock
-(V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC) it is guaranteed that the timestamp is
-taken after the complete frame has been received (or transmitted in
-case of video output devices). For other kinds of
-timestamps this may vary depending on the driver.</para>
-
     <table frame="none" pgwide="1" id="v4l2-buffer">
       <title>struct <structname>v4l2_buffer</structname></title>
       <tgroup cols="4">
@@ -1125,6 +1119,36 @@ in which case caches have not been used.</entry>
 	    <entry>The CAPTURE buffer timestamp has been taken from the
 	    corresponding OUTPUT buffer. This flag applies only to mem2mem devices.</entry>
 	  </row>
+	  <row>
+	    <entry><constant>V4L2_BUF_FLAG_TSTAMP_SRC_MASK</constant></entry>
+	    <entry>0x00070000</entry>
+	    <entry>Mask for timestamp sources below. The timestamp source
+	    defines the point of time the timestamp is taken in relation to
+	    the frame. Logical 'and' operation between the
+	    <structfield>flags</structfield> field and
+	    <constant>V4L2_BUF_FLAG_TSTAMP_SRC_MASK</constant> produces the
+	    value of the timestamp source.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>V4L2_BUF_FLAG_TSTAMP_SRC_EOF</constant></entry>
+	    <entry>0x00000000</entry>
+	    <entry>End Of Frame. The buffer timestamp has been taken
+	    when the last pixel of the frame has been received or the
+	    last pixel of the frame has been transmitted. In practice,
+	    software generated timestamps will typically be read from
+	    the clock a small amount of time after the last pixel has
+	    been received or transmitten, depending on the system and
+	    other activity in it.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>V4L2_BUF_FLAG_TSTAMP_SRC_SOE</constant></entry>
+	    <entry>0x00010000</entry>
+	    <entry>Start Of Exposure. The buffer timestamp has been
+	    taken when the exposure of the frame has begun. This is
+	    only valid for the
+	    <constant>V4L2_BUF_TYPE_VIDEO_CAPTURE</constant> buffer
+	    type.</entry>
+	  </row>
 	</tbody>
       </tgroup>
     </table>
diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index 521350a74c46..42a856813d06 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -2227,7 +2227,9 @@ int vb2_queue_init(struct vb2_queue *q)
 	    WARN_ON(!q->io_modes)	  ||
 	    WARN_ON(!q->ops->queue_setup) ||
 	    WARN_ON(!q->ops->buf_queue)   ||
-	    WARN_ON(q->timestamp_flags & ~V4L2_BUF_FLAG_TIMESTAMP_MASK))
+	    WARN_ON(q->timestamp_flags &
+		    ~(V4L2_BUF_FLAG_TIMESTAMP_MASK |
+		      V4L2_BUF_FLAG_TSTAMP_SRC_MASK)))
 		return -EINVAL;
 
 	/* Warn that the driver should choose an appropriate timestamp type */
diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index 3770be6e972d..bf6859ee46c3 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -312,6 +312,8 @@ struct v4l2_fh;
  * @buf_struct_size: size of the driver-specific buffer structure;
  *		"0" indicates the driver doesn't want to use a custom buffer
  *		structure type, so sizeof(struct vb2_buffer) will is used
+ * @timestamp_flags: Timestamp flags; V4L2_BUF_FLAGS_TIMESTAMP_* and
+ *		V4L2_BUF_FLAGS_TSTAMP_SRC_*
  * @gfp_flags:	additional gfp flags used when allocating the buffers.
  *		Typically this is 0, but it may be e.g. GFP_DMA or __GFP_DMA32
  *		to force the buffer allocation to a specific memory zone.
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index cb838765dd40..17acba8c7f9f 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -700,6 +700,10 @@ struct v4l2_buffer {
 #define V4L2_BUF_FLAG_TIMESTAMP_UNKNOWN		0x00000000
 #define V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC	0x00002000
 #define V4L2_BUF_FLAG_TIMESTAMP_COPY		0x00004000
+/* Timestamp sources. */
+#define V4L2_BUF_FLAG_TSTAMP_SRC_MASK		0x00070000
+#define V4L2_BUF_FLAG_TSTAMP_SRC_EOF		0x00000000
+#define V4L2_BUF_FLAG_TSTAMP_SRC_SOE		0x00010000
 
 /**
  * struct v4l2_exportbuffer - export of video buffer as DMABUF file descriptor
-- 
cgit v1.2.3


From 8d4e897bd0150fab594a871484e554472ee01452 Mon Sep 17 00:00:00 2001
From: Manu Gautam <mgautam@codeaurora.org>
Date: Fri, 28 Feb 2014 16:50:22 +0530
Subject: usb: gadget: f_fs: Add support for SuperSpeed Mode

Allow userspace to pass SuperSpeed descriptors and
handle them in the driver accordingly.
This change doesn't modify existing desc_header and thereby
keeps the ABI changes backward compatible i.e. existing
userspace drivers compiled with old header (functionfs.h)
would continue to work with the updated kernel.

Signed-off-by: Manu Gautam <mgautam@codeaurora.org>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/f_fs.c           | 182 +++++++++++++++++++++++++++---------
 drivers/usb/gadget/u_fs.h           |  10 +-
 include/uapi/linux/usb/functionfs.h |   5 +
 3 files changed, 147 insertions(+), 50 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c
index 1ae741fdace0..66f60b9a34a2 100644
--- a/drivers/usb/gadget/f_fs.c
+++ b/drivers/usb/gadget/f_fs.c
@@ -134,8 +134,8 @@ struct ffs_ep {
 	struct usb_ep			*ep;	/* P: ffs->eps_lock */
 	struct usb_request		*req;	/* P: epfile->mutex */
 
-	/* [0]: full speed, [1]: high speed */
-	struct usb_endpoint_descriptor	*descs[2];
+	/* [0]: full speed, [1]: high speed, [2]: super speed */
+	struct usb_endpoint_descriptor	*descs[3];
 
 	u8				num;
 
@@ -1450,10 +1450,11 @@ static void ffs_data_reset(struct ffs_data *ffs)
 	ffs->raw_strings = NULL;
 	ffs->stringtabs = NULL;
 
-	ffs->raw_descs_length = 0;
-	ffs->raw_fs_descs_length = 0;
+	ffs->raw_fs_hs_descs_length = 0;
+	ffs->raw_ss_descs_length = 0;
 	ffs->fs_descs_count = 0;
 	ffs->hs_descs_count = 0;
+	ffs->ss_descs_count = 0;
 
 	ffs->strings_count = 0;
 	ffs->interfaces_count = 0;
@@ -1596,7 +1597,24 @@ static int ffs_func_eps_enable(struct ffs_function *func)
 	spin_lock_irqsave(&func->ffs->eps_lock, flags);
 	do {
 		struct usb_endpoint_descriptor *ds;
-		ds = ep->descs[ep->descs[1] ? 1 : 0];
+		int desc_idx;
+
+		if (ffs->gadget->speed == USB_SPEED_SUPER)
+			desc_idx = 2;
+		else if (ffs->gadget->speed == USB_SPEED_HIGH)
+			desc_idx = 1;
+		else
+			desc_idx = 0;
+
+		/* fall-back to lower speed if desc missing for current speed */
+		do {
+			ds = ep->descs[desc_idx];
+		} while (!ds && --desc_idx >= 0);
+
+		if (!ds) {
+			ret = -EINVAL;
+			break;
+		}
 
 		ep->ep->driver_data = ep;
 		ep->ep->desc = ds;
@@ -1731,6 +1749,12 @@ static int __must_check ffs_do_desc(char *data, unsigned len,
 	}
 		break;
 
+	case USB_DT_SS_ENDPOINT_COMP:
+		pr_vdebug("EP SS companion descriptor\n");
+		if (length != sizeof(struct usb_ss_ep_comp_descriptor))
+			goto inv_length;
+		break;
+
 	case USB_DT_OTHER_SPEED_CONFIG:
 	case USB_DT_INTERFACE_POWER:
 	case USB_DT_DEBUG:
@@ -1841,8 +1865,8 @@ static int __ffs_data_do_entity(enum ffs_entity_type type,
 static int __ffs_data_got_descs(struct ffs_data *ffs,
 				char *const _data, size_t len)
 {
-	unsigned fs_count, hs_count;
-	int fs_len, ret = -EINVAL;
+	unsigned fs_count, hs_count, ss_count = 0;
+	int fs_len, hs_len, ss_len, ret = -EINVAL;
 	char *data = _data;
 
 	ENTER();
@@ -1853,9 +1877,6 @@ static int __ffs_data_got_descs(struct ffs_data *ffs,
 	fs_count = get_unaligned_le32(data +  8);
 	hs_count = get_unaligned_le32(data + 12);
 
-	if (!fs_count && !hs_count)
-		goto einval;
-
 	data += 16;
 	len  -= 16;
 
@@ -1874,22 +1895,54 @@ static int __ffs_data_got_descs(struct ffs_data *ffs,
 	}
 
 	if (likely(hs_count)) {
-		ret = ffs_do_descs(hs_count, data, len,
+		hs_len = ffs_do_descs(hs_count, data, len,
 				   __ffs_data_do_entity, ffs);
-		if (unlikely(ret < 0))
+		if (unlikely(hs_len < 0)) {
+			ret = hs_len;
+			goto error;
+		}
+
+		data += hs_len;
+		len  -= hs_len;
+	} else {
+		hs_len = 0;
+	}
+
+	if (len >= 8) {
+		/* Check SS_MAGIC for presence of ss_descs and get SS_COUNT */
+		if (get_unaligned_le32(data) != FUNCTIONFS_SS_DESC_MAGIC)
+			goto einval;
+
+		ss_count = get_unaligned_le32(data + 4);
+		data += 8;
+		len  -= 8;
+	}
+
+	if (!fs_count && !hs_count && !ss_count)
+		goto einval;
+
+	if (ss_count) {
+		ss_len = ffs_do_descs(ss_count, data, len,
+				   __ffs_data_do_entity, ffs);
+		if (unlikely(ss_len < 0)) {
+			ret = ss_len;
 			goto error;
+		}
+		ret = ss_len;
 	} else {
+		ss_len = 0;
 		ret = 0;
 	}
 
 	if (unlikely(len != ret))
 		goto einval;
 
-	ffs->raw_fs_descs_length = fs_len;
-	ffs->raw_descs_length    = fs_len + ret;
-	ffs->raw_descs           = _data;
-	ffs->fs_descs_count      = fs_count;
-	ffs->hs_descs_count      = hs_count;
+	ffs->raw_fs_hs_descs_length	 = fs_len + hs_len;
+	ffs->raw_ss_descs_length	 = ss_len;
+	ffs->raw_descs			 = _data;
+	ffs->fs_descs_count		 = fs_count;
+	ffs->hs_descs_count		 = hs_count;
+	ffs->ss_descs_count		 = ss_count;
 
 	return 0;
 
@@ -2112,21 +2165,28 @@ static int __ffs_func_bind_do_descs(enum ffs_entity_type type, u8 *valuep,
 	struct usb_endpoint_descriptor *ds = (void *)desc;
 	struct ffs_function *func = priv;
 	struct ffs_ep *ffs_ep;
-
-	/*
-	 * If hs_descriptors is not NULL then we are reading hs
-	 * descriptors now
-	 */
-	const int isHS = func->function.hs_descriptors != NULL;
-	unsigned idx;
+	unsigned ep_desc_id, idx;
+	static const char *speed_names[] = { "full", "high", "super" };
 
 	if (type != FFS_DESCRIPTOR)
 		return 0;
 
-	if (isHS)
+	/*
+	 * If ss_descriptors is not NULL, we are reading super speed
+	 * descriptors; if hs_descriptors is not NULL, we are reading high
+	 * speed descriptors; otherwise, we are reading full speed
+	 * descriptors.
+	 */
+	if (func->function.ss_descriptors) {
+		ep_desc_id = 2;
+		func->function.ss_descriptors[(long)valuep] = desc;
+	} else if (func->function.hs_descriptors) {
+		ep_desc_id = 1;
 		func->function.hs_descriptors[(long)valuep] = desc;
-	else
+	} else {
+		ep_desc_id = 0;
 		func->function.fs_descriptors[(long)valuep]    = desc;
+	}
 
 	if (!desc || desc->bDescriptorType != USB_DT_ENDPOINT)
 		return 0;
@@ -2134,13 +2194,13 @@ static int __ffs_func_bind_do_descs(enum ffs_entity_type type, u8 *valuep,
 	idx = (ds->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK) - 1;
 	ffs_ep = func->eps + idx;
 
-	if (unlikely(ffs_ep->descs[isHS])) {
-		pr_vdebug("two %sspeed descriptors for EP %d\n",
-			  isHS ? "high" : "full",
+	if (unlikely(ffs_ep->descs[ep_desc_id])) {
+		pr_err("two %sspeed descriptors for EP %d\n",
+			  speed_names[ep_desc_id],
 			  ds->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
 		return -EINVAL;
 	}
-	ffs_ep->descs[isHS] = ds;
+	ffs_ep->descs[ep_desc_id] = ds;
 
 	ffs_dump_mem(": Original  ep desc", ds, ds->bLength);
 	if (ffs_ep->ep) {
@@ -2284,8 +2344,10 @@ static int _ffs_func_bind(struct usb_configuration *c,
 	const int full = !!func->ffs->fs_descs_count;
 	const int high = gadget_is_dualspeed(func->gadget) &&
 		func->ffs->hs_descs_count;
+	const int super = gadget_is_superspeed(func->gadget) &&
+		func->ffs->ss_descs_count;
 
-	int ret;
+	int fs_len, hs_len, ret;
 
 	/* Make it a single chunk, less management later on */
 	vla_group(d);
@@ -2294,15 +2356,17 @@ static int _ffs_func_bind(struct usb_configuration *c,
 		full ? ffs->fs_descs_count + 1 : 0);
 	vla_item_with_sz(d, struct usb_descriptor_header *, hs_descs,
 		high ? ffs->hs_descs_count + 1 : 0);
+	vla_item_with_sz(d, struct usb_descriptor_header *, ss_descs,
+		super ? ffs->ss_descs_count + 1 : 0);
 	vla_item_with_sz(d, short, inums, ffs->interfaces_count);
 	vla_item_with_sz(d, char, raw_descs,
-		high ? ffs->raw_descs_length : ffs->raw_fs_descs_length);
+			ffs->raw_fs_hs_descs_length + ffs->raw_ss_descs_length);
 	char *vlabuf;
 
 	ENTER();
 
-	/* Only high speed but not supported by gadget? */
-	if (unlikely(!(full | high)))
+	/* Has descriptors only for speeds gadget does not support */
+	if (unlikely(!(full | high | super)))
 		return -ENOTSUPP;
 
 	/* Allocate a single chunk, less management later on */
@@ -2312,8 +2376,16 @@ static int _ffs_func_bind(struct usb_configuration *c,
 
 	/* Zero */
 	memset(vla_ptr(vlabuf, d, eps), 0, d_eps__sz);
+	/* Copy only raw (hs,fs) descriptors (until ss_magic and ss_count) */
 	memcpy(vla_ptr(vlabuf, d, raw_descs), ffs->raw_descs + 16,
-	       d_raw_descs__sz);
+		ffs->raw_fs_hs_descs_length);
+	/* Copy SS descs present @ header + hs_fs_descs + ss_magic + ss_count */
+	if (func->ffs->ss_descs_count)
+		memcpy(vla_ptr(vlabuf, d, raw_descs) +
+				ffs->raw_fs_hs_descs_length,
+		       ffs->raw_descs + 16 + ffs->raw_fs_hs_descs_length + 8,
+		       ffs->raw_ss_descs_length);
+
 	memset(vla_ptr(vlabuf, d, inums), 0xff, d_inums__sz);
 	for (ret = ffs->eps_count; ret; --ret) {
 		struct ffs_ep *ptr;
@@ -2335,22 +2407,38 @@ static int _ffs_func_bind(struct usb_configuration *c,
 	 */
 	if (likely(full)) {
 		func->function.fs_descriptors = vla_ptr(vlabuf, d, fs_descs);
-		ret = ffs_do_descs(ffs->fs_descs_count,
-				   vla_ptr(vlabuf, d, raw_descs),
-				   d_raw_descs__sz,
-				   __ffs_func_bind_do_descs, func);
-		if (unlikely(ret < 0))
+		fs_len = ffs_do_descs(ffs->fs_descs_count,
+				      vla_ptr(vlabuf, d, raw_descs),
+				      d_raw_descs__sz,
+				      __ffs_func_bind_do_descs, func);
+		if (unlikely(fs_len < 0)) {
+			ret = fs_len;
 			goto error;
+		}
 	} else {
-		ret = 0;
+		fs_len = 0;
 	}
 
 	if (likely(high)) {
 		func->function.hs_descriptors = vla_ptr(vlabuf, d, hs_descs);
-		ret = ffs_do_descs(ffs->hs_descs_count,
-				   vla_ptr(vlabuf, d, raw_descs) + ret,
-				   d_raw_descs__sz - ret,
-				   __ffs_func_bind_do_descs, func);
+		hs_len = ffs_do_descs(ffs->hs_descs_count,
+				      vla_ptr(vlabuf, d, raw_descs) + fs_len,
+				      d_raw_descs__sz - fs_len,
+				      __ffs_func_bind_do_descs, func);
+		if (unlikely(hs_len < 0)) {
+			ret = hs_len;
+			goto error;
+		}
+	} else {
+		hs_len = 0;
+	}
+
+	if (likely(super)) {
+		func->function.ss_descriptors = vla_ptr(vlabuf, d, ss_descs);
+		ret = ffs_do_descs(ffs->ss_descs_count,
+				vla_ptr(vlabuf, d, raw_descs) + fs_len + hs_len,
+				d_raw_descs__sz - fs_len - hs_len,
+				__ffs_func_bind_do_descs, func);
 		if (unlikely(ret < 0))
 			goto error;
 	}
@@ -2361,7 +2449,8 @@ static int _ffs_func_bind(struct usb_configuration *c,
 	 * now.
 	 */
 	ret = ffs_do_descs(ffs->fs_descs_count +
-			   (high ? ffs->hs_descs_count : 0),
+			   (high ? ffs->hs_descs_count : 0) +
+			   (super ? ffs->ss_descs_count : 0),
 			   vla_ptr(vlabuf, d, raw_descs), d_raw_descs__sz,
 			   __ffs_func_bind_do_nums, func);
 	if (unlikely(ret < 0))
@@ -2708,6 +2797,7 @@ static void ffs_func_unbind(struct usb_configuration *c,
 	 */
 	func->function.fs_descriptors = NULL;
 	func->function.hs_descriptors = NULL;
+	func->function.ss_descriptors = NULL;
 	func->interfaces_nums = NULL;
 
 	ffs_event_add(ffs, FUNCTIONFS_UNBIND);
diff --git a/drivers/usb/gadget/u_fs.h b/drivers/usb/gadget/u_fs.h
index c39e805025b9..0deb6d5f7c35 100644
--- a/drivers/usb/gadget/u_fs.h
+++ b/drivers/usb/gadget/u_fs.h
@@ -208,14 +208,16 @@ struct ffs_data {
 	/*
 	 * Real descriptors are 16 bytes after raw_descs (so you need
 	 * to skip 16 bytes (ie. ffs->raw_descs + 16) to get to the
-	 * first full speed descriptor).  raw_descs_length and
-	 * raw_fs_descs_length do not have those 16 bytes added.
+	 * first full speed descriptor).
+	 * raw_fs_hs_descs_length does not have those 16 bytes added.
+	 * ss_descs are 8 bytes (ss_magic + count) pass the hs_descs
 	 */
 	const void			*raw_descs;
-	unsigned			raw_descs_length;
-	unsigned			raw_fs_descs_length;
+	unsigned			raw_fs_hs_descs_length;
+	unsigned			raw_ss_descs_length;
 	unsigned			fs_descs_count;
 	unsigned			hs_descs_count;
+	unsigned			ss_descs_count;
 
 	unsigned short			strings_count;
 	unsigned short			interfaces_count;
diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index d6b01283f85c..0f8f7be5b0d3 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -13,6 +13,7 @@ enum {
 	FUNCTIONFS_STRINGS_MAGIC     = 2
 };
 
+#define FUNCTIONFS_SS_DESC_MAGIC 0x0055DE5C
 
 #ifndef __KERNEL__
 
@@ -50,7 +51,11 @@ struct usb_functionfs_descs_head {
  * |  12 | hs_count  | LE32         | number of high-speed descriptors     |
  * |  16 | fs_descrs | Descriptor[] | list of full-speed descriptors       |
  * |     | hs_descrs | Descriptor[] | list of high-speed descriptors       |
+ * |     | ss_magic  | LE32         | FUNCTIONFS_SS_DESC_MAGIC             |
+ * |     | ss_count  | LE32         | number of super-speed descriptors    |
+ * |     | ss_descrs | Descriptor[] | list of super-speed descriptors      |
  *
+ * ss_magic: if present then it implies that SS_DESCs are also present
  * descs are just valid USB descriptors and have the following format:
  *
  * | off | name            | type | description              |
-- 
cgit v1.2.3


From ac8dde11f2b397fe2282f585d5eb427a13675ea2 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Fri, 28 Feb 2014 16:50:23 +0530
Subject: usb: gadget: f_fs: Add flags to descriptors block

This reworks the way SuperSpeed descriptors are added and instead of
having a magic after full and high speed descriptors, it reworks the
whole descriptors block to include a flags field which lists which
descriptors are present and makes future extensions possible.

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/f_fs.c           | 132 +++++++++++++++---------------------
 drivers/usb/gadget/u_fs.h           |  12 ++--
 include/uapi/linux/usb/functionfs.h |  49 +++++++------
 3 files changed, 91 insertions(+), 102 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c
index 66f60b9a34a2..42f7a0e4be59 100644
--- a/drivers/usb/gadget/f_fs.c
+++ b/drivers/usb/gadget/f_fs.c
@@ -1434,7 +1434,7 @@ static void ffs_data_clear(struct ffs_data *ffs)
 	if (ffs->epfiles)
 		ffs_epfiles_destroy(ffs->epfiles, ffs->eps_count);
 
-	kfree(ffs->raw_descs);
+	kfree(ffs->raw_descs_data);
 	kfree(ffs->raw_strings);
 	kfree(ffs->stringtabs);
 }
@@ -1446,12 +1446,12 @@ static void ffs_data_reset(struct ffs_data *ffs)
 	ffs_data_clear(ffs);
 
 	ffs->epfiles = NULL;
+	ffs->raw_descs_data = NULL;
 	ffs->raw_descs = NULL;
 	ffs->raw_strings = NULL;
 	ffs->stringtabs = NULL;
 
-	ffs->raw_fs_hs_descs_length = 0;
-	ffs->raw_ss_descs_length = 0;
+	ffs->raw_descs_length = 0;
 	ffs->fs_descs_count = 0;
 	ffs->hs_descs_count = 0;
 	ffs->ss_descs_count = 0;
@@ -1865,89 +1865,76 @@ static int __ffs_data_do_entity(enum ffs_entity_type type,
 static int __ffs_data_got_descs(struct ffs_data *ffs,
 				char *const _data, size_t len)
 {
-	unsigned fs_count, hs_count, ss_count = 0;
-	int fs_len, hs_len, ss_len, ret = -EINVAL;
-	char *data = _data;
+	char *data = _data, *raw_descs;
+	unsigned counts[3], flags;
+	int ret = -EINVAL, i;
 
 	ENTER();
 
-	if (unlikely(get_unaligned_le32(data) != FUNCTIONFS_DESCRIPTORS_MAGIC ||
-		     get_unaligned_le32(data + 4) != len))
+	if (get_unaligned_le32(data + 4) != len)
 		goto error;
-	fs_count = get_unaligned_le32(data +  8);
-	hs_count = get_unaligned_le32(data + 12);
-
-	data += 16;
-	len  -= 16;
 
-	if (likely(fs_count)) {
-		fs_len = ffs_do_descs(fs_count, data, len,
-				      __ffs_data_do_entity, ffs);
-		if (unlikely(fs_len < 0)) {
-			ret = fs_len;
+	switch (get_unaligned_le32(data)) {
+	case FUNCTIONFS_DESCRIPTORS_MAGIC:
+		flags = FUNCTIONFS_HAS_FS_DESC | FUNCTIONFS_HAS_HS_DESC;
+		data += 8;
+		len  -= 8;
+		break;
+	case FUNCTIONFS_DESCRIPTORS_MAGIC_V2:
+		flags = get_unaligned_le32(data + 8);
+		if (flags & ~(FUNCTIONFS_HAS_FS_DESC |
+			      FUNCTIONFS_HAS_HS_DESC |
+			      FUNCTIONFS_HAS_SS_DESC)) {
+			ret = -ENOSYS;
 			goto error;
 		}
-
-		data += fs_len;
-		len  -= fs_len;
-	} else {
-		fs_len = 0;
+		data += 12;
+		len  -= 12;
+		break;
+	default:
+		goto error;
 	}
 
-	if (likely(hs_count)) {
-		hs_len = ffs_do_descs(hs_count, data, len,
-				   __ffs_data_do_entity, ffs);
-		if (unlikely(hs_len < 0)) {
-			ret = hs_len;
+	/* Read fs_count, hs_count and ss_count (if present) */
+	for (i = 0; i < 3; ++i) {
+		if (!(flags & (1 << i))) {
+			counts[i] = 0;
+		} else if (len < 4) {
 			goto error;
+		} else {
+			counts[i] = get_unaligned_le32(data);
+			data += 4;
+			len  -= 4;
 		}
-
-		data += hs_len;
-		len  -= hs_len;
-	} else {
-		hs_len = 0;
-	}
-
-	if (len >= 8) {
-		/* Check SS_MAGIC for presence of ss_descs and get SS_COUNT */
-		if (get_unaligned_le32(data) != FUNCTIONFS_SS_DESC_MAGIC)
-			goto einval;
-
-		ss_count = get_unaligned_le32(data + 4);
-		data += 8;
-		len  -= 8;
 	}
 
-	if (!fs_count && !hs_count && !ss_count)
-		goto einval;
-
-	if (ss_count) {
-		ss_len = ffs_do_descs(ss_count, data, len,
+	/* Read descriptors */
+	raw_descs = data;
+	for (i = 0; i < 3; ++i) {
+		if (!counts[i])
+			continue;
+		ret = ffs_do_descs(counts[i], data, len,
 				   __ffs_data_do_entity, ffs);
-		if (unlikely(ss_len < 0)) {
-			ret = ss_len;
+		if (ret < 0)
 			goto error;
-		}
-		ret = ss_len;
-	} else {
-		ss_len = 0;
-		ret = 0;
+		data += ret;
+		len  -= ret;
 	}
 
-	if (unlikely(len != ret))
-		goto einval;
+	if (raw_descs == data || len) {
+		ret = -EINVAL;
+		goto error;
+	}
 
-	ffs->raw_fs_hs_descs_length	 = fs_len + hs_len;
-	ffs->raw_ss_descs_length	 = ss_len;
-	ffs->raw_descs			 = _data;
-	ffs->fs_descs_count		 = fs_count;
-	ffs->hs_descs_count		 = hs_count;
-	ffs->ss_descs_count		 = ss_count;
+	ffs->raw_descs_data	= _data;
+	ffs->raw_descs		= raw_descs;
+	ffs->raw_descs_length	= data - raw_descs;
+	ffs->fs_descs_count	= counts[0];
+	ffs->hs_descs_count	= counts[1];
+	ffs->ss_descs_count	= counts[2];
 
 	return 0;
 
-einval:
-	ret = -EINVAL;
 error:
 	kfree(_data);
 	return ret;
@@ -2359,8 +2346,7 @@ static int _ffs_func_bind(struct usb_configuration *c,
 	vla_item_with_sz(d, struct usb_descriptor_header *, ss_descs,
 		super ? ffs->ss_descs_count + 1 : 0);
 	vla_item_with_sz(d, short, inums, ffs->interfaces_count);
-	vla_item_with_sz(d, char, raw_descs,
-			ffs->raw_fs_hs_descs_length + ffs->raw_ss_descs_length);
+	vla_item_with_sz(d, char, raw_descs, ffs->raw_descs_length);
 	char *vlabuf;
 
 	ENTER();
@@ -2376,15 +2362,9 @@ static int _ffs_func_bind(struct usb_configuration *c,
 
 	/* Zero */
 	memset(vla_ptr(vlabuf, d, eps), 0, d_eps__sz);
-	/* Copy only raw (hs,fs) descriptors (until ss_magic and ss_count) */
-	memcpy(vla_ptr(vlabuf, d, raw_descs), ffs->raw_descs + 16,
-		ffs->raw_fs_hs_descs_length);
-	/* Copy SS descs present @ header + hs_fs_descs + ss_magic + ss_count */
-	if (func->ffs->ss_descs_count)
-		memcpy(vla_ptr(vlabuf, d, raw_descs) +
-				ffs->raw_fs_hs_descs_length,
-		       ffs->raw_descs + 16 + ffs->raw_fs_hs_descs_length + 8,
-		       ffs->raw_ss_descs_length);
+	/* Copy descriptors  */
+	memcpy(vla_ptr(vlabuf, d, raw_descs), ffs->raw_descs,
+	       ffs->raw_descs_length);
 
 	memset(vla_ptr(vlabuf, d, inums), 0xff, d_inums__sz);
 	for (ret = ffs->eps_count; ret; --ret) {
diff --git a/drivers/usb/gadget/u_fs.h b/drivers/usb/gadget/u_fs.h
index 0deb6d5f7c35..bf0ba375d459 100644
--- a/drivers/usb/gadget/u_fs.h
+++ b/drivers/usb/gadget/u_fs.h
@@ -206,15 +206,13 @@ struct ffs_data {
 
 	/* filled by __ffs_data_got_descs() */
 	/*
-	 * Real descriptors are 16 bytes after raw_descs (so you need
-	 * to skip 16 bytes (ie. ffs->raw_descs + 16) to get to the
-	 * first full speed descriptor).
-	 * raw_fs_hs_descs_length does not have those 16 bytes added.
-	 * ss_descs are 8 bytes (ss_magic + count) pass the hs_descs
+	 * raw_descs is what you kfree, real_descs points inside of raw_descs,
+	 * where full speed, high speed and super speed descriptors start.
+	 * real_descs_length is the length of all those descriptors.
 	 */
+	const void			*raw_descs_data;
 	const void			*raw_descs;
-	unsigned			raw_fs_hs_descs_length;
-	unsigned			raw_ss_descs_length;
+	unsigned			raw_descs_length;
 	unsigned			fs_descs_count;
 	unsigned			hs_descs_count;
 	unsigned			ss_descs_count;
diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index 0f8f7be5b0d3..2a4b4a72a4f9 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -10,10 +10,15 @@
 
 enum {
 	FUNCTIONFS_DESCRIPTORS_MAGIC = 1,
-	FUNCTIONFS_STRINGS_MAGIC     = 2
+	FUNCTIONFS_STRINGS_MAGIC = 2,
+	FUNCTIONFS_DESCRIPTORS_MAGIC_V2 = 3,
 };
 
-#define FUNCTIONFS_SS_DESC_MAGIC 0x0055DE5C
+enum functionfs_flags {
+	FUNCTIONFS_HAS_FS_DESC = 1,
+	FUNCTIONFS_HAS_HS_DESC = 2,
+	FUNCTIONFS_HAS_SS_DESC = 4,
+};
 
 #ifndef __KERNEL__
 
@@ -29,34 +34,40 @@ struct usb_endpoint_descriptor_no_audio {
 } __attribute__((packed));
 
 
-/*
- * All numbers must be in little endian order.
- */
-
-struct usb_functionfs_descs_head {
-	__le32 magic;
-	__le32 length;
-	__le32 fs_count;
-	__le32 hs_count;
-} __attribute__((packed));
-
 /*
  * Descriptors format:
  *
  * | off | name      | type         | description                          |
  * |-----+-----------+--------------+--------------------------------------|
- * |   0 | magic     | LE32         | FUNCTIONFS_{FS,HS}_DESCRIPTORS_MAGIC |
+ * |   0 | magic     | LE32         | FUNCTIONFS_DESCRIPTORS_MAGIC_V2      |
+ * |   4 | length    | LE32         | length of the whole data chunk       |
+ * |   8 | flags     | LE32         | combination of functionfs_flags      |
+ * |     | fs_count  | LE32         | number of full-speed descriptors     |
+ * |     | hs_count  | LE32         | number of high-speed descriptors     |
+ * |     | ss_count  | LE32         | number of super-speed descriptors    |
+ * |     | fs_descrs | Descriptor[] | list of full-speed descriptors       |
+ * |     | hs_descrs | Descriptor[] | list of high-speed descriptors       |
+ * |     | ss_descrs | Descriptor[] | list of super-speed descriptors      |
+ *
+ * Depending on which flags are set, various fields may be missing in the
+ * structure.  Any flags that are not recognised cause the whole block to be
+ * rejected with -ENOSYS.
+ *
+ * Legacy descriptors format:
+ *
+ * | off | name      | type         | description                          |
+ * |-----+-----------+--------------+--------------------------------------|
+ * |   0 | magic     | LE32         | FUNCTIONFS_DESCRIPTORS_MAGIC         |
  * |   4 | length    | LE32         | length of the whole data chunk       |
  * |   8 | fs_count  | LE32         | number of full-speed descriptors     |
  * |  12 | hs_count  | LE32         | number of high-speed descriptors     |
  * |  16 | fs_descrs | Descriptor[] | list of full-speed descriptors       |
  * |     | hs_descrs | Descriptor[] | list of high-speed descriptors       |
- * |     | ss_magic  | LE32         | FUNCTIONFS_SS_DESC_MAGIC             |
- * |     | ss_count  | LE32         | number of super-speed descriptors    |
- * |     | ss_descrs | Descriptor[] | list of super-speed descriptors      |
  *
- * ss_magic: if present then it implies that SS_DESCs are also present
- * descs are just valid USB descriptors and have the following format:
+ * All numbers must be in little endian order.
+ *
+ * Descriptor[] is an array of valid USB descriptors which have the following
+ * format:
  *
  * | off | name            | type | description              |
  * |-----+-----------------+------+--------------------------|
-- 
cgit v1.2.3


From 3b02b56cd5988d569731f6c0c26992296e46b758 Mon Sep 17 00:00:00 2001
From: Vytas Dauksa <vytas.dauksa@smoothwall.net>
Date: Tue, 17 Dec 2013 14:01:43 +0000
Subject: netfilter: ipset: add hash:ip,mark data type to ipset

Introduce packet mark support with new ip,mark hash set. This includes
userspace and kernelspace code, hash:ip,mark set tests and man page
updates.

The intended use of ip,mark set is similar to the ip:port type, but for
protocols which don't use a predictable port number. Instead of port
number it matches a firewall mark determined by a layer 7 filtering
program like opendpi.

As well as allowing or blocking traffic it will also be used for
accounting packets and bytes sent for each protocol.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h      |  10 +-
 include/uapi/linux/netfilter/ipset/ip_set.h |   1 +
 net/netfilter/ipset/Kconfig                 |   9 +
 net/netfilter/ipset/Makefile                |   1 +
 net/netfilter/ipset/ip_set_hash_ipmark.c    | 312 ++++++++++++++++++++++++++++
 5 files changed, 329 insertions(+), 4 deletions(-)
 create mode 100644 net/netfilter/ipset/ip_set_hash_ipmark.c

(limited to 'include/uapi/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 0c7d01eae56c..4ac00d4aa87e 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -39,11 +39,13 @@ enum ip_set_feature {
 	IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG),
 	IPSET_TYPE_IFACE_FLAG = 5,
 	IPSET_TYPE_IFACE = (1 << IPSET_TYPE_IFACE_FLAG),
-	IPSET_TYPE_NOMATCH_FLAG = 6,
+	IPSET_TYPE_MARK_FLAG = 6,
+	IPSET_TYPE_MARK = (1 << IPSET_TYPE_MARK_FLAG),
+	IPSET_TYPE_NOMATCH_FLAG = 7,
 	IPSET_TYPE_NOMATCH = (1 << IPSET_TYPE_NOMATCH_FLAG),
 	/* Strictly speaking not a feature, but a flag for dumping:
 	 * this settype must be dumped last */
-	IPSET_DUMP_LAST_FLAG = 7,
+	IPSET_DUMP_LAST_FLAG = 8,
 	IPSET_DUMP_LAST = (1 << IPSET_DUMP_LAST_FLAG),
 };
 
@@ -171,8 +173,6 @@ struct ip_set_type {
 	char name[IPSET_MAXNAMELEN];
 	/* Protocol version */
 	u8 protocol;
-	/* Set features to control swapping */
-	u8 features;
 	/* Set type dimension */
 	u8 dimension;
 	/*
@@ -182,6 +182,8 @@ struct ip_set_type {
 	u8 family;
 	/* Type revisions */
 	u8 revision_min, revision_max;
+	/* Set features to control swapping */
+	u16 features;
 
 	/* Create set */
 	int (*create)(struct net *net, struct ip_set *set,
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 25d3b2f79c02..5368f8275774 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -82,6 +82,7 @@ enum {
 	IPSET_ATTR_PROTO,	/* 7 */
 	IPSET_ATTR_CADT_FLAGS,	/* 8 */
 	IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO,	/* 9 */
+	IPSET_ATTR_MARK,	/* 10 */
 	/* Reserve empty slots */
 	IPSET_ATTR_CADT_MAX = 16,
 	/* Create-only specific attributes */
diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig
index 44cd4f58adf0..2f7f5c32c6f9 100644
--- a/net/netfilter/ipset/Kconfig
+++ b/net/netfilter/ipset/Kconfig
@@ -61,6 +61,15 @@ config IP_SET_HASH_IP
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config IP_SET_HASH_IPMARK
+	tristate "hash:ip,mark set support"
+	depends on IP_SET
+	help
+	  This option adds the hash:ip,mark set type support, by which one
+	  can store IPv4/IPv6 address and mark pairs.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config IP_SET_HASH_IPPORT
 	tristate "hash:ip,port set support"
 	depends on IP_SET
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile
index 44b2d38476fa..231f10196cb9 100644
--- a/net/netfilter/ipset/Makefile
+++ b/net/netfilter/ipset/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o
 
 # hash types
 obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o
+obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o
 obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o
 obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o
 obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
new file mode 100644
index 000000000000..e56c0d916fac
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -0,0 +1,312 @@
+/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ * Copyright (C) 2013 Smoothwall Ltd. <vytas.dauksa@smoothwall.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:ip,mark type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN	0
+#define IPSET_TYPE_REV_MAX	0
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>");
+IP_SET_MODULE_DESC("hash:ip,mark", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:ip,mark");
+
+/* Type specific function prefix */
+#define HTYPE		hash_ipmark
+
+/* IPv4 variant */
+
+/* Member elements */
+struct hash_ipmark4_elem {
+	__be32 ip;
+	__u32 mark;
+};
+
+/* Common functions */
+
+static inline bool
+hash_ipmark4_data_equal(const struct hash_ipmark4_elem *ip1,
+			const struct hash_ipmark4_elem *ip2,
+			u32 *multi)
+{
+	return ip1->ip == ip2->ip &&
+	       ip1->mark == ip2->mark;
+}
+
+static bool
+hash_ipmark4_data_list(struct sk_buff *skb,
+		       const struct hash_ipmark4_elem *data)
+{
+	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
+	    nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+static inline void
+hash_ipmark4_data_next(struct hash_ipmark4_elem *next,
+		       const struct hash_ipmark4_elem *d)
+{
+	next->ip = d->ip;
+}
+
+#define MTYPE           hash_ipmark4
+#define PF              4
+#define HOST_MASK       32
+#define HKEY_DATALEN	sizeof(struct hash_ipmark4_elem)
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ipmark4_kadt(struct ip_set *set, const struct sk_buff *skb,
+		  const struct xt_action_param *par,
+		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipmark4_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	e.mark = skb->mark;
+
+	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
+		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	const struct hash_ipmark *h = set->data;
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipmark4_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	u32 ip, ip_to = 0;
+	int ret;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
+	      ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK]));
+
+	if (adt == IPSET_TEST ||
+	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) {
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	ip_to = ip = ntohl(e.ip);
+	if (tb[IPSET_ATTR_IP_TO]) {
+		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
+		if (ret)
+			return ret;
+		if (ip > ip_to)
+			swap(ip, ip_to);
+	} else if (tb[IPSET_ATTR_CIDR]) {
+		u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
+
+		if (!cidr || cidr > 32)
+			return -IPSET_ERR_INVALID_CIDR;
+		ip_set_mask_from_to(ip, ip_to, cidr);
+	}
+
+	if (retried)
+		ip = ntohl(h->next.ip);
+	for (; !before(ip_to, ip); ip++) {
+		e.ip = htonl(ip);
+		ret = adtfn(set, &e, &ext, &ext, flags);
+
+		if (ret && !ip_set_eexist(ret, flags))
+			return ret;
+		else
+			ret = 0;
+	}
+	return ret;
+}
+
+/* IPv6 variant */
+
+struct hash_ipmark6_elem {
+	union nf_inet_addr ip;
+	__u32 mark;
+};
+
+/* Common functions */
+
+static inline bool
+hash_ipmark6_data_equal(const struct hash_ipmark6_elem *ip1,
+			const struct hash_ipmark6_elem *ip2,
+			u32 *multi)
+{
+	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) &&
+	       ip1->mark == ip2->mark;
+}
+
+static bool
+hash_ipmark6_data_list(struct sk_buff *skb,
+		       const struct hash_ipmark6_elem *data)
+{
+	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) ||
+	    nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return 1;
+}
+
+static inline void
+hash_ipmark6_data_next(struct hash_ipmark4_elem *next,
+		       const struct hash_ipmark6_elem *d)
+{
+}
+
+#undef MTYPE
+#undef PF
+#undef HOST_MASK
+#undef HKEY_DATALEN
+
+#define MTYPE		hash_ipmark6
+#define PF		6
+#define HOST_MASK	128
+#define HKEY_DATALEN	sizeof(struct hash_ipmark6_elem)
+#define	IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+
+static int
+hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb,
+		  const struct xt_action_param *par,
+		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipmark6_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+	e.mark = skb->mark;
+
+	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
+		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+	ipset_adtfn adtfn = set->variant->adt[adt];
+	struct hash_ipmark6_elem e = { };
+	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+	int ret;
+
+	if (unlikely(!tb[IPSET_ATTR_IP] ||
+		     !ip_set_attr_netorder(tb, IPSET_ATTR_MARK) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+		     tb[IPSET_ATTR_IP_TO] ||
+		     tb[IPSET_ATTR_CIDR]))
+		return -IPSET_ERR_PROTOCOL;
+
+	if (tb[IPSET_ATTR_LINENO])
+		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+	      ip_set_get_extensions(set, tb, &ext);
+	if (ret)
+		return ret;
+
+	e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK]));
+
+	if (adt == IPSET_TEST) {
+		ret = adtfn(set, &e, &ext, &ext, flags);
+		return ip_set_eexist(ret, flags) ? 0 : ret;
+	}
+
+	ret = adtfn(set, &e, &ext, &ext, flags);
+	if (ret && !ip_set_eexist(ret, flags))
+		return ret;
+	else
+		ret = 0;
+
+	return ret;
+}
+
+static struct ip_set_type hash_ipmark_type __read_mostly = {
+	.name		= "hash:ip,mark",
+	.protocol	= IPSET_PROTOCOL,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_MARK,
+	.dimension	= IPSET_DIM_TWO,
+	.family		= NFPROTO_UNSPEC,
+	.revision_min	= IPSET_TYPE_REV_MIN,
+	.revision_max	= IPSET_TYPE_REV_MAX,
+	.create		= hash_ipmark_create,
+	.create_policy	= {
+		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
+		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
+		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
+		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },
+	},
+	.adt_policy	= {
+		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
+		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },
+		[IPSET_ATTR_MARK]	= { .type = NLA_U32 },
+		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },
+		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },
+		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },
+		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 },
+		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },
+		[IPSET_ATTR_COMMENT]	= { .type = NLA_NUL_STRING },
+	},
+	.me		= THIS_MODULE,
+};
+
+static int __init
+hash_ipmark_init(void)
+{
+	return ip_set_type_register(&hash_ipmark_type);
+}
+
+static void __exit
+hash_ipmark_fini(void)
+{
+	ip_set_type_unregister(&hash_ipmark_type);
+}
+
+module_init(hash_ipmark_init);
+module_exit(hash_ipmark_fini);
-- 
cgit v1.2.3


From 4d0e5c076d01d3fb4767a502a9517923fb9a080e Mon Sep 17 00:00:00 2001
From: Vytas Dauksa <vytas.dauksa@smoothwall.net>
Date: Tue, 17 Dec 2013 14:01:44 +0000
Subject: netfilter: ipset: add markmask for hash:ip,mark data type

Introduce packet mark mask for hash:ip,mark data type. This allows to
set mark bit filter for the ip set.

Change-Id: Id8dd9ca7e64477c4f7b022a1d9c1a5b187f1c96e

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/uapi/linux/netfilter/ipset/ip_set.h |  2 ++
 net/netfilter/ipset/ip_set_hash_gen.h       | 31 +++++++++++++++++++++++++++++
 net/netfilter/ipset/ip_set_hash_ipmark.c    |  9 +++++++++
 3 files changed, 42 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 5368f8275774..f636f282b142 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -89,6 +89,7 @@ enum {
 	IPSET_ATTR_GC,
 	IPSET_ATTR_HASHSIZE,
 	IPSET_ATTR_MAXELEM,
+	IPSET_ATTR_MARKMASK,
 	IPSET_ATTR_NETMASK,
 	IPSET_ATTR_PROBES,
 	IPSET_ATTR_RESIZE,
@@ -138,6 +139,7 @@ enum ipset_errno {
 	IPSET_ERR_EXIST,
 	IPSET_ERR_INVALID_CIDR,
 	IPSET_ERR_INVALID_NETMASK,
+	IPSET_ERR_INVALID_MARKMASK,
 	IPSET_ERR_INVALID_FAMILY,
 	IPSET_ERR_TIMEOUT,
 	IPSET_ERR_REFERENCED,
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index be6932ad3a86..b1eed81e24c5 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -263,6 +263,9 @@ struct htype {
 	u32 maxelem;		/* max elements in the hash */
 	u32 elements;		/* current element (vs timeout) */
 	u32 initval;		/* random jhash init value */
+#ifdef IP_SET_HASH_WITH_MARKMASK
+	u32 markmask;		/* markmask value for mark mask to store */
+#endif
 	struct timer_list gc;	/* garbage collection when timeout enabled */
 	struct mtype_elem next; /* temporary storage for uadd */
 #ifdef IP_SET_HASH_WITH_MULTI
@@ -453,6 +456,9 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
 	       a->timeout == b->timeout &&
 #ifdef IP_SET_HASH_WITH_NETMASK
 	       x->netmask == y->netmask &&
+#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+	       x->markmask == y->markmask &&
 #endif
 	       a->extensions == b->extensions;
 }
@@ -907,6 +913,10 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 	if (h->netmask != HOST_MASK &&
 	    nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask))
 		goto nla_put_failure;
+#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+	if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
+		goto nla_put_failure;
 #endif
 	if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
 	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
@@ -1016,6 +1026,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
 			    struct nlattr *tb[], u32 flags)
 {
 	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
+#ifdef IP_SET_HASH_WITH_MARKMASK
+	u32 markmask;
+#endif
 	u8 hbits;
 #ifdef IP_SET_HASH_WITH_NETMASK
 	u8 netmask;
@@ -1026,6 +1039,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
 
 	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
 		return -IPSET_ERR_INVALID_FAMILY;
+
+#ifdef IP_SET_HASH_WITH_MARKMASK
+	markmask = 0xffffffff;
+#endif
 #ifdef IP_SET_HASH_WITH_NETMASK
 	netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
 	pr_debug("Create set %s with family %s\n",
@@ -1034,6 +1051,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
 
 	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
+#ifdef IP_SET_HASH_WITH_MARKMASK
+		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK) ||
+#endif
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
 		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
 		return -IPSET_ERR_PROTOCOL;
@@ -1057,6 +1077,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
 			return -IPSET_ERR_INVALID_NETMASK;
 	}
 #endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+	if (tb[IPSET_ATTR_MARKMASK]) {
+		markmask = ntohl(nla_get_u32(tb[IPSET_ATTR_MARKMASK]));
+
+		if ((markmask > 4294967295u) || markmask == 0)
+			return -IPSET_ERR_INVALID_MARKMASK;
+	}
+#endif
 
 	hsize = sizeof(*h);
 #ifdef IP_SET_HASH_WITH_NETS
@@ -1070,6 +1098,9 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
 	h->maxelem = maxelem;
 #ifdef IP_SET_HASH_WITH_NETMASK
 	h->netmask = netmask;
+#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+	h->markmask = markmask;
 #endif
 	get_random_bytes(&h->initval, sizeof(h->initval));
 	set->timeout = IPSET_NO_TIMEOUT;
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index e56c0d916fac..1bf8e8524218 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -34,6 +34,7 @@ MODULE_ALIAS("ip_set_hash:ip,mark");
 
 /* Type specific function prefix */
 #define HTYPE		hash_ipmark
+#define IP_SET_HASH_WITH_MARKMASK
 
 /* IPv4 variant */
 
@@ -85,11 +86,13 @@ hash_ipmark4_kadt(struct ip_set *set, const struct sk_buff *skb,
 		  const struct xt_action_param *par,
 		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
+	const struct hash_ipmark *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipmark4_elem e = { };
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	e.mark = skb->mark;
+	e.mark &= h->markmask;
 
 	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
 	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
@@ -122,6 +125,7 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
 		return ret;
 
 	e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK]));
+	e.mark &= h->markmask;
 
 	if (adt == IPSET_TEST ||
 	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) {
@@ -213,11 +217,13 @@ hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb,
 		  const struct xt_action_param *par,
 		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
+	const struct hash_ipmark *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipmark6_elem e = { };
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 
 	e.mark = skb->mark;
+	e.mark &= h->markmask;
 
 	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
 	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
@@ -227,6 +233,7 @@ static int
 hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
 		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
+	const struct hash_ipmark *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipmark6_elem e = { };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -250,6 +257,7 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
 		return ret;
 
 	e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK]));
+	e.mark &= h->markmask;
 
 	if (adt == IPSET_TEST) {
 		ret = adtfn(set, &e, &ext, &ext, flags);
@@ -275,6 +283,7 @@ static struct ip_set_type hash_ipmark_type __read_mostly = {
 	.revision_max	= IPSET_TYPE_REV_MAX,
 	.create		= hash_ipmark_create,
 	.create_policy	= {
+		[IPSET_ATTR_MARKMASK]	= { .type = NLA_U32 },
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
 		[IPSET_ATTR_MAXELEM]	= { .type = NLA_U32 },
 		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },
-- 
cgit v1.2.3


From af284ece87365f3a69723f5bcc1bcdb505b5eb5d Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Thu, 13 Feb 2014 12:19:56 +0100
Subject: netfilter: ipset: Prepare the kernel for create option flags when no
 extension is needed

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h      | 2 ++
 include/uapi/linux/netfilter/ipset/ip_set.h | 6 ++++++
 2 files changed, 8 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 4ac00d4aa87e..f476bcec25ea 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -219,6 +219,8 @@ struct ip_set {
 	u8 revision;
 	/* Extensions */
 	u8 extensions;
+	/* Create flags */
+	u8 flags;
 	/* Default timeout value, if enabled */
 	u32 timeout;
 	/* Element data size */
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index f636f282b142..a29a378701d2 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -188,6 +188,12 @@ enum ipset_cadt_flags {
 	IPSET_FLAG_CADT_MAX	= 15,
 };
 
+/* The flag bits which correspond to the non-extension create flags */
+enum ipset_create_flags {
+	IPSET_CREATE_FLAG_NONE = 0,
+	IPSET_CREATE_FLAG_MAX = 7,
+};
+
 /* Commands with settype-specific attributes */
 enum ipset_adt {
 	IPSET_ADD,
-- 
cgit v1.2.3


From 004088768b78f69002f03a341597217eb608fb2c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 13 Feb 2014 12:40:59 +0100
Subject: netfilter: ipset: kernel: uapi: fix MARKMASK attr ABI breakage

commit 2dfb973c0dcc6d2211 (add markmask for hash:ip,mark data type)
inserted IPSET_ATTR_MARKMASK in-between other enum values, i.e.
changing values of all further attributes.  This causes 'ipset list'
segfault on existing kernels since ipset no longer finds
IPSET_ATTR_MEMSIZE (it has a different value on kernel side).

Jozsef points out it should be moved below IPSET_ATTR_MARK which
works since there is some extra reserved space after that value.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/uapi/linux/netfilter/ipset/ip_set.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index a29a378701d2..a1ca24408206 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -83,13 +83,13 @@ enum {
 	IPSET_ATTR_CADT_FLAGS,	/* 8 */
 	IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO,	/* 9 */
 	IPSET_ATTR_MARK,	/* 10 */
+	IPSET_ATTR_MARKMASK,	/* 11 */
 	/* Reserve empty slots */
 	IPSET_ATTR_CADT_MAX = 16,
 	/* Create-only specific attributes */
 	IPSET_ATTR_GC,
 	IPSET_ATTR_HASHSIZE,
 	IPSET_ATTR_MAXELEM,
-	IPSET_ATTR_MARKMASK,
 	IPSET_ATTR_NETMASK,
 	IPSET_ATTR_PROBES,
 	IPSET_ATTR_RESIZE,
@@ -139,7 +139,6 @@ enum ipset_errno {
 	IPSET_ERR_EXIST,
 	IPSET_ERR_INVALID_CIDR,
 	IPSET_ERR_INVALID_NETMASK,
-	IPSET_ERR_INVALID_MARKMASK,
 	IPSET_ERR_INVALID_FAMILY,
 	IPSET_ERR_TIMEOUT,
 	IPSET_ERR_REFERENCED,
@@ -147,6 +146,7 @@ enum ipset_errno {
 	IPSET_ERR_IPADDR_IPV6,
 	IPSET_ERR_COUNTER,
 	IPSET_ERR_COMMENT,
+	IPSET_ERR_INVALID_MARKMASK,
 
 	/* Type specific error codes */
 	IPSET_ERR_TYPE_SPECIFIC = 4352,
-- 
cgit v1.2.3


From 07cf8f5ae2657ac495b906c68ff3441ff8ba80ba Mon Sep 17 00:00:00 2001
From: Josh Hunt <johunt@akamai.com>
Date: Fri, 28 Feb 2014 22:14:57 -0500
Subject: netfilter: ipset: add forceadd kernel support for hash set types

Adds a new property for hash set types, where if a set is created
with the 'forceadd' option and the set becomes full the next addition
to the set may succeed and evict a random entry from the set.

To keep overhead low eviction is done very simply. It checks to see
which bucket the new entry would be added. If the bucket's pos value
is non-zero (meaning there's at least one entry in the bucket) it
replaces the first entry in the bucket. If pos is zero, then it continues
down the normal add process.

This property is useful if you have a set for 'ban' lists where it may
not matter if you release some entries from the set early.

Signed-off-by: Josh Hunt <johunt@akamai.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h       |  3 +++
 include/uapi/linux/netfilter/ipset/ip_set.h  |  7 +++++--
 net/netfilter/ipset/ip_set_core.c            |  2 ++
 net/netfilter/ipset/ip_set_hash_gen.h        | 12 ++++++++++++
 net/netfilter/ipset/ip_set_hash_ip.c         |  3 ++-
 net/netfilter/ipset/ip_set_hash_ipmark.c     |  2 +-
 net/netfilter/ipset/ip_set_hash_ipport.c     |  3 ++-
 net/netfilter/ipset/ip_set_hash_ipportip.c   |  3 ++-
 net/netfilter/ipset/ip_set_hash_ipportnet.c  |  3 ++-
 net/netfilter/ipset/ip_set_hash_net.c        |  3 ++-
 net/netfilter/ipset/ip_set_hash_netiface.c   |  3 ++-
 net/netfilter/ipset/ip_set_hash_netnet.c     |  2 +-
 net/netfilter/ipset/ip_set_hash_netport.c    |  3 ++-
 net/netfilter/ipset/ip_set_hash_netportnet.c |  3 ++-
 14 files changed, 40 insertions(+), 12 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index f476bcec25ea..96afc29184be 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -65,6 +65,7 @@ enum ip_set_extension {
 #define SET_WITH_TIMEOUT(s)	((s)->extensions & IPSET_EXT_TIMEOUT)
 #define SET_WITH_COUNTER(s)	((s)->extensions & IPSET_EXT_COUNTER)
 #define SET_WITH_COMMENT(s)	((s)->extensions & IPSET_EXT_COMMENT)
+#define SET_WITH_FORCEADD(s)	((s)->flags & IPSET_CREATE_FLAG_FORCEADD)
 
 /* Extension id, in size order */
 enum ip_set_ext_id {
@@ -255,6 +256,8 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
 		cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
 	if (SET_WITH_COMMENT(set))
 		cadt_flags |= IPSET_FLAG_WITH_COMMENT;
+	if (SET_WITH_FORCEADD(set))
+		cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
 
 	if (!cadt_flags)
 		return 0;
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index a1ca24408206..78c2f2e79920 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -185,13 +185,16 @@ enum ipset_cadt_flags {
 	IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS),
 	IPSET_FLAG_BIT_WITH_COMMENT = 4,
 	IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT),
+	IPSET_FLAG_BIT_WITH_FORCEADD = 5,
+	IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD),
 	IPSET_FLAG_CADT_MAX	= 15,
 };
 
 /* The flag bits which correspond to the non-extension create flags */
 enum ipset_create_flags {
-	IPSET_CREATE_FLAG_NONE = 0,
-	IPSET_CREATE_FLAG_MAX = 7,
+	IPSET_CREATE_FLAG_BIT_FORCEADD = 0,
+	IPSET_CREATE_FLAG_FORCEADD = (1 << IPSET_CREATE_FLAG_BIT_FORCEADD),
+	IPSET_CREATE_FLAG_BIT_MAX = 7,
 };
 
 /* Commands with settype-specific attributes */
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 636cb8df5354..117208321f16 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -368,6 +368,8 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
 
 	if (tb[IPSET_ATTR_CADT_FLAGS])
 		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+	if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
+		set->flags |= IPSET_CREATE_FLAG_FORCEADD;
 	for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
 		if (!add_extension(id, cadt_flags, tb))
 			continue;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index b1eed81e24c5..61c7fb052802 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -633,6 +633,18 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	bool flag_exist = flags & IPSET_FLAG_EXIST;
 	u32 key, multi = 0;
 
+	if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set)) {
+		rcu_read_lock_bh();
+		t = rcu_dereference_bh(h->table);
+		key = HKEY(value, h->initval, t->htable_bits);
+		n = hbucket(t,key);
+		if (n->pos) {
+			/* Choosing the first entry in the array to replace */
+			j = 0;
+			goto reuse_slot;
+		}
+		rcu_read_unlock_bh();
+	}
 	if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem)
 		/* FIXME: when set is full, we slow down here */
 		mtype_expire(set, h, NLEN(set->family), set->dsize);
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index e65fc2423d56..dd40607f878e 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -25,7 +25,8 @@
 
 #define IPSET_TYPE_REV_MIN	0
 /*				1	   Counters support */
-#define IPSET_TYPE_REV_MAX	2	/* Comments support */
+/*				2	   Comments support */
+#define IPSET_TYPE_REV_MAX	3	/* Forceadd support */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 1bf8e8524218..4eff0a297254 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -25,7 +25,7 @@
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
 #define IPSET_TYPE_REV_MIN	0
-#define IPSET_TYPE_REV_MAX	0
+#define IPSET_TYPE_REV_MAX	1	/* Forceadd support */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>");
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 525a595dd1fe..7597b82a8b03 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -27,7 +27,8 @@
 #define IPSET_TYPE_REV_MIN	0
 /*				1    SCTP and UDPLITE support added */
 /*				2    Counters support added */
-#define IPSET_TYPE_REV_MAX	3 /* Comments support added */
+/*				3    Comments support added */
+#define IPSET_TYPE_REV_MAX	4 /* Forceadd support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index f5636631466e..672655ffd573 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -27,7 +27,8 @@
 #define IPSET_TYPE_REV_MIN	0
 /*				1    SCTP and UDPLITE support added */
 /*				2    Counters support added */
-#define IPSET_TYPE_REV_MAX	3 /* Comments support added */
+/*				3    Comments support added */
+#define IPSET_TYPE_REV_MAX	4 /* Forceadd support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 5d87fe8a41ff..7308d84f9277 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -29,7 +29,8 @@
 /*				2    Range as input support for IPv4 added */
 /*				3    nomatch flag support added */
 /*				4    Counters support added */
-#define IPSET_TYPE_REV_MAX	5 /* Comments support added */
+/*				5    Comments support added */
+#define IPSET_TYPE_REV_MAX	6 /* Forceadd support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 8295cf4f9fdc..4c7d495783a3 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -26,7 +26,8 @@
 /*				1    Range as input support for IPv4 added */
 /*				2    nomatch flag support added */
 /*				3    Counters support added */
-#define IPSET_TYPE_REV_MAX	4 /* Comments support added */
+/*				4    Comments support added */
+#define IPSET_TYPE_REV_MAX	5 /* Forceadd support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index b827a0f1f351..db2606805b35 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -27,7 +27,8 @@
 /*				1    nomatch flag support added */
 /*				2    /0 support added */
 /*				3    Counters support added */
-#define IPSET_TYPE_REV_MAX	4 /* Comments support added */
+/*				4    Comments support added */
+#define IPSET_TYPE_REV_MAX	5 /* Forceadd support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 4e7261df8961..3e99987e4bf2 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -24,7 +24,7 @@
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
 #define IPSET_TYPE_REV_MIN	0
-#define IPSET_TYPE_REV_MAX	0
+#define IPSET_TYPE_REV_MAX	1	/* Forceadd support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 7097fb0141bf..1c645fbd09c7 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -28,7 +28,8 @@
 /*				2    Range as input support for IPv4 added */
 /*				3    nomatch flag support added */
 /*				4    Counters support added */
-#define IPSET_TYPE_REV_MAX	5 /* Comments support added */
+/*				5    Comments support added */
+#define IPSET_TYPE_REV_MAX	6 /* Forceadd support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 703d1192a6a2..c0d2ba73f8b2 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -25,7 +25,8 @@
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
 #define IPSET_TYPE_REV_MIN	0
-#define IPSET_TYPE_REV_MAX	0 /* Comments support added */
+/*				0    Comments support added */
+#define IPSET_TYPE_REV_MAX	1 /* Forceadd support added */
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
-- 
cgit v1.2.3


From 870a2df4ca026817eb87bb2f9daaa60a93fd051a Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 6 Mar 2014 18:24:29 +0100
Subject: xfrm: rename struct xfrm_filter

iproute2 already defines a structure with that name, let's use another one to
avoid any conflict.

CC: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h        | 4 ++--
 include/uapi/linux/xfrm.h | 4 ++--
 net/key/af_key.c          | 2 +-
 net/xfrm/xfrm_state.c     | 4 ++--
 net/xfrm/xfrm_user.c      | 8 ++++----
 5 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 8b925288a8bc..ce3d96f752fd 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -121,7 +121,7 @@ struct xfrm_state_walk {
 	u8			dying;
 	u8			proto;
 	u32			seq;
-	struct xfrm_filter	*filter;
+	struct xfrm_address_filter *filter;
 };
 
 /* Full description of state of transformer. */
@@ -1423,7 +1423,7 @@ static inline void xfrm_sysctl_fini(struct net *net)
 #endif
 
 void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
-			  struct xfrm_filter *filter);
+			  struct xfrm_address_filter *filter);
 int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
 		    int (*func)(struct xfrm_state *, int, void*), void *);
 void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net);
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 6550c679584f..25e5dd916ba4 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -299,7 +299,7 @@ enum xfrm_attr_type_t {
 	XFRMA_REPLAY_ESN_VAL,	/* struct xfrm_replay_esn */
 	XFRMA_SA_EXTRA_FLAGS,	/* __u32 */
 	XFRMA_PROTO,		/* __u8 */
-	XFRMA_FILTER,		/* struct xfrm_filter */
+	XFRMA_ADDRESS_FILTER,	/* struct xfrm_address_filter */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
@@ -476,7 +476,7 @@ struct xfrm_user_mapping {
 	__be16				new_sport;
 };
 
-struct xfrm_filter {
+struct xfrm_address_filter {
 	xfrm_address_t			saddr;
 	xfrm_address_t			daddr;
 	__u16				family;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index a50d979b5926..12651b42aad8 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1799,7 +1799,7 @@ static void pfkey_dump_sa_done(struct pfkey_sock *pfk)
 static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
 {
 	u8 proto;
-	struct xfrm_filter *filter = NULL;
+	struct xfrm_address_filter *filter = NULL;
 	struct pfkey_sock *pfk = pfkey_sk(sk);
 
 	if (pfk->dump.dump != NULL)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 5339c26bb0cf..cee850c76165 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1598,7 +1598,7 @@ unlock:
 EXPORT_SYMBOL(xfrm_alloc_spi);
 
 static bool __xfrm_state_filter_match(struct xfrm_state *x,
-				      struct xfrm_filter *filter)
+				      struct xfrm_address_filter *filter)
 {
 	if (filter) {
 		if ((filter->family == AF_INET ||
@@ -1657,7 +1657,7 @@ out:
 EXPORT_SYMBOL(xfrm_state_walk);
 
 void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
-			  struct xfrm_filter *filter)
+			  struct xfrm_address_filter *filter)
 {
 	INIT_LIST_HEAD(&walk->all);
 	walk->proto = proto;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 023e5e7ea4c6..903725b8cc70 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -904,7 +904,7 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
 
 	if (!cb->args[0]) {
 		struct nlattr *attrs[XFRMA_MAX+1];
-		struct xfrm_filter *filter = NULL;
+		struct xfrm_address_filter *filter = NULL;
 		u8 proto = 0;
 		int err;
 
@@ -915,12 +915,12 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
 		if (err < 0)
 			return err;
 
-		if (attrs[XFRMA_FILTER]) {
+		if (attrs[XFRMA_ADDRESS_FILTER]) {
 			filter = kmalloc(sizeof(*filter), GFP_KERNEL);
 			if (filter == NULL)
 				return -ENOMEM;
 
-			memcpy(filter, nla_data(attrs[XFRMA_FILTER]),
+			memcpy(filter, nla_data(attrs[XFRMA_ADDRESS_FILTER]),
 			       sizeof(*filter));
 		}
 
@@ -2334,7 +2334,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_REPLAY_ESN_VAL]	= { .len = sizeof(struct xfrm_replay_state_esn) },
 	[XFRMA_SA_EXTRA_FLAGS]	= { .type = NLA_U32 },
 	[XFRMA_PROTO]		= { .type = NLA_U8 },
-	[XFRMA_FILTER]		= { .len = sizeof(struct xfrm_filter) },
+	[XFRMA_ADDRESS_FILTER]	= { .len = sizeof(struct xfrm_address_filter) },
 };
 
 static const struct xfrm_link {
-- 
cgit v1.2.3


From 9859ccd2c8be63ce939522e63e265f2b0caa1109 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Fri, 28 Feb 2014 16:36:23 +0100
Subject: can: introduce the data bitrate configuration for CAN FD

As CAN FD offers a second bitrate for the data section of the CAN frame the
infrastructure for storing and configuring this second bitrate is introduced.
Improved the readability of the if-statement by inserting some newlines.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Acked-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c            | 45 +++++++++++++++++++++++++++++++++++++++-
 include/linux/can/dev.h          |  6 ++++--
 include/uapi/linux/can/netlink.h |  2 ++
 3 files changed, 50 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 8141290e4c18..8ebe112458c4 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -647,6 +647,10 @@ static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = {
 				= { .len = sizeof(struct can_bittiming_const) },
 	[IFLA_CAN_CLOCK]	= { .len = sizeof(struct can_clock) },
 	[IFLA_CAN_BERR_COUNTER]	= { .len = sizeof(struct can_berr_counter) },
+	[IFLA_CAN_DATA_BITTIMING]
+				= { .len = sizeof(struct can_bittiming) },
+	[IFLA_CAN_DATA_BITTIMING_CONST]
+				= { .len = sizeof(struct can_bittiming_const) },
 };
 
 static int can_changelink(struct net_device *dev,
@@ -707,6 +711,27 @@ static int can_changelink(struct net_device *dev,
 			return err;
 	}
 
+	if (data[IFLA_CAN_DATA_BITTIMING]) {
+		struct can_bittiming dbt;
+
+		/* Do not allow changing bittiming while running */
+		if (dev->flags & IFF_UP)
+			return -EBUSY;
+		memcpy(&dbt, nla_data(data[IFLA_CAN_DATA_BITTIMING]),
+		       sizeof(dbt));
+		err = can_get_bittiming(dev, &dbt, priv->data_bittiming_const);
+		if (err)
+			return err;
+		memcpy(&priv->data_bittiming, &dbt, sizeof(dbt));
+
+		if (priv->do_set_data_bittiming) {
+			/* Finally, set the bit-timing registers */
+			err = priv->do_set_data_bittiming(dev);
+			if (err)
+				return err;
+		}
+	}
+
 	return 0;
 }
 
@@ -725,6 +750,10 @@ static size_t can_get_size(const struct net_device *dev)
 	size += nla_total_size(sizeof(u32));			/* IFLA_CAN_RESTART_MS */
 	if (priv->do_get_berr_counter)				/* IFLA_CAN_BERR_COUNTER */
 		size += nla_total_size(sizeof(struct can_berr_counter));
+	if (priv->data_bittiming.bitrate)			/* IFLA_CAN_DATA_BITTIMING */
+		size += nla_total_size(sizeof(struct can_bittiming));
+	if (priv->data_bittiming_const)				/* IFLA_CAN_DATA_BITTIMING_CONST */
+		size += nla_total_size(sizeof(struct can_bittiming_const));
 
 	return size;
 }
@@ -738,20 +767,34 @@ static int can_fill_info(struct sk_buff *skb, const struct net_device *dev)
 
 	if (priv->do_get_state)
 		priv->do_get_state(dev, &state);
+
 	if ((priv->bittiming.bitrate &&
 	     nla_put(skb, IFLA_CAN_BITTIMING,
 		     sizeof(priv->bittiming), &priv->bittiming)) ||
+
 	    (priv->bittiming_const &&
 	     nla_put(skb, IFLA_CAN_BITTIMING_CONST,
 		     sizeof(*priv->bittiming_const), priv->bittiming_const)) ||
+
 	    nla_put(skb, IFLA_CAN_CLOCK, sizeof(cm), &priv->clock) ||
 	    nla_put_u32(skb, IFLA_CAN_STATE, state) ||
 	    nla_put(skb, IFLA_CAN_CTRLMODE, sizeof(cm), &cm) ||
 	    nla_put_u32(skb, IFLA_CAN_RESTART_MS, priv->restart_ms) ||
+
 	    (priv->do_get_berr_counter &&
 	     !priv->do_get_berr_counter(dev, &bec) &&
-	     nla_put(skb, IFLA_CAN_BERR_COUNTER, sizeof(bec), &bec)))
+	     nla_put(skb, IFLA_CAN_BERR_COUNTER, sizeof(bec), &bec)) ||
+
+	    (priv->data_bittiming.bitrate &&
+	     nla_put(skb, IFLA_CAN_DATA_BITTIMING,
+		     sizeof(priv->data_bittiming), &priv->data_bittiming)) ||
+
+	    (priv->data_bittiming_const &&
+	     nla_put(skb, IFLA_CAN_DATA_BITTIMING_CONST,
+		     sizeof(*priv->data_bittiming_const),
+		     priv->data_bittiming_const)))
 		return -EMSGSIZE;
+
 	return 0;
 }
 
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index dc5f9026b67f..8adaee96f292 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -33,8 +33,9 @@ enum can_mode {
 struct can_priv {
 	struct can_device_stats can_stats;
 
-	struct can_bittiming bittiming;
-	const struct can_bittiming_const *bittiming_const;
+	struct can_bittiming bittiming, data_bittiming;
+	const struct can_bittiming_const *bittiming_const,
+		*data_bittiming_const;
 	struct can_clock clock;
 
 	enum can_state state;
@@ -45,6 +46,7 @@ struct can_priv {
 	struct timer_list restart_timer;
 
 	int (*do_set_bittiming)(struct net_device *dev);
+	int (*do_set_data_bittiming)(struct net_device *dev);
 	int (*do_set_mode)(struct net_device *dev, enum can_mode mode);
 	int (*do_get_state)(const struct net_device *dev,
 			    enum can_state *state);
diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h
index df944ed206a8..b41933d6bdcd 100644
--- a/include/uapi/linux/can/netlink.h
+++ b/include/uapi/linux/can/netlink.h
@@ -122,6 +122,8 @@ enum {
 	IFLA_CAN_RESTART_MS,
 	IFLA_CAN_RESTART,
 	IFLA_CAN_BERR_COUNTER,
+	IFLA_CAN_DATA_BITTIMING,
+	IFLA_CAN_DATA_BITTIMING_CONST,
 	__IFLA_CAN_MAX
 };
 
-- 
cgit v1.2.3


From bc05a8944a344acdb81a65de055ca6febbf9657c Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Fri, 28 Feb 2014 16:36:24 +0100
Subject: can: allow to change the device mtu for CAN FD capable devices

The configuration for CAN FD depends on CAN_CTRLMODE_FD enabled in the driver
specific ctrlmode_supported capabilities.

The configuration can be done either with the 'fd { on | off }' option in the
'ip' tool from iproute2 or by setting the CAN netdevice MTU to CAN_MTU (16) or
to CANFD_MTU (72).

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Acked-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c            | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/can/dev.h          |  1 +
 include/uapi/linux/can/netlink.h |  1 +
 3 files changed, 41 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 8ebe112458c4..4e20d82b799e 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -594,6 +594,39 @@ void free_candev(struct net_device *dev)
 }
 EXPORT_SYMBOL_GPL(free_candev);
 
+/*
+ * changing MTU and control mode for CAN/CANFD devices
+ */
+int can_change_mtu(struct net_device *dev, int new_mtu)
+{
+	struct can_priv *priv = netdev_priv(dev);
+
+	/* Do not allow changing the MTU while running */
+	if (dev->flags & IFF_UP)
+		return -EBUSY;
+
+	/* allow change of MTU according to the CANFD ability of the device */
+	switch (new_mtu) {
+	case CAN_MTU:
+		priv->ctrlmode &= ~CAN_CTRLMODE_FD;
+		break;
+
+	case CANFD_MTU:
+		if (!(priv->ctrlmode_supported & CAN_CTRLMODE_FD))
+			return -EINVAL;
+
+		priv->ctrlmode |= CAN_CTRLMODE_FD;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	dev->mtu = new_mtu;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(can_change_mtu);
+
 /*
  * Common open function when the device gets opened.
  *
@@ -693,6 +726,12 @@ static int can_changelink(struct net_device *dev,
 			return -EOPNOTSUPP;
 		priv->ctrlmode &= ~cm->mask;
 		priv->ctrlmode |= cm->flags;
+
+		/* CAN_CTRLMODE_FD can only be set when driver supports FD */
+		if (priv->ctrlmode & CAN_CTRLMODE_FD)
+			dev->mtu = CANFD_MTU;
+		else
+			dev->mtu = CAN_MTU;
 	}
 
 	if (data[IFLA_CAN_RESTART_MS]) {
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 8adaee96f292..3ce5e526525f 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -113,6 +113,7 @@ struct can_priv *safe_candev_priv(struct net_device *dev);
 
 int open_candev(struct net_device *dev);
 void close_candev(struct net_device *dev);
+int can_change_mtu(struct net_device *dev, int new_mtu);
 
 int register_candev(struct net_device *dev);
 void unregister_candev(struct net_device *dev);
diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h
index b41933d6bdcd..7e2e1863db16 100644
--- a/include/uapi/linux/can/netlink.h
+++ b/include/uapi/linux/can/netlink.h
@@ -96,6 +96,7 @@ struct can_ctrlmode {
 #define CAN_CTRLMODE_3_SAMPLES		0x04	/* Triple sampling mode */
 #define CAN_CTRLMODE_ONE_SHOT		0x08	/* One-Shot mode */
 #define CAN_CTRLMODE_BERR_REPORTING	0x10	/* Bus-error reporting */
+#define CAN_CTRLMODE_FD			0x20	/* CAN FD mode */
 
 /*
  * CAN device statistics
-- 
cgit v1.2.3


From 147d2601d8fabf9451364f2d58098530a37eb3c9 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Mon, 27 Jan 2014 18:16:55 -0500
Subject: capabilities: add descriptions for AUDIT_CONTROL and AUDIT_WRITE

Fill in missing descriptions for AUDIT_CONTROL and AUDIT_WRITE definitions.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
---
 include/uapi/linux/capability.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h
index ba478fa3012e..154dd6d3c8fe 100644
--- a/include/uapi/linux/capability.h
+++ b/include/uapi/linux/capability.h
@@ -308,8 +308,12 @@ struct vfs_cap_data {
 
 #define CAP_LEASE            28
 
+/* Allow writing the audit log via unicast netlink socket */
+
 #define CAP_AUDIT_WRITE      29
 
+/* Allow configuration of audit via unicast netlink socket */
+
 #define CAP_AUDIT_CONTROL    30
 
 #define CAP_SETFCAP	     31
-- 
cgit v1.2.3


From 935aa6b2e8a911e81baecec0537dd7e478dc8c91 Mon Sep 17 00:00:00 2001
From: Daniel Jeong <gshark.jeong@gmail.com>
Date: Mon, 3 Mar 2014 06:52:08 -0300
Subject: [media] v4l2-controls.h: Add addtional Flash fault bits

Three Flash fault are added. V4L2_FLASH_FAULT_UNDER_VOLTAGE for the case low
voltage below the min. limit. V4L2_FLASH_FAULT_INPUT_VOLTAGE for the case
falling input voltage and chip adjust flash current not occur under voltage
event. V4L2_FLASH_FAULT_LED_OVER_TEMPERATURE for the case the temperature
exceed the maximun limit

Signed-off-by: Daniel Jeong <gshark.jeong@gmail.com>
Signed-off-by: Sakari Ailus <sakari.ailus@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/v4l2-controls.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index e97101c1686f..6501c0b2860e 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -815,6 +815,9 @@ enum v4l2_flash_strobe_source {
 #define V4L2_FLASH_FAULT_SHORT_CIRCUIT		(1 << 3)
 #define V4L2_FLASH_FAULT_OVER_CURRENT		(1 << 4)
 #define V4L2_FLASH_FAULT_INDICATOR		(1 << 5)
+#define V4L2_FLASH_FAULT_UNDER_VOLTAGE		(1 << 6)
+#define V4L2_FLASH_FAULT_INPUT_VOLTAGE		(1 << 7)
+#define V4L2_FLASH_FAULT_LED_OVER_TEMPERATURE	(1 << 8)
 
 #define V4L2_CID_FLASH_CHARGE			(V4L2_CID_FLASH_CLASS_BASE + 11)
 #define V4L2_CID_FLASH_READY			(V4L2_CID_FLASH_CLASS_BASE + 12)
-- 
cgit v1.2.3


From 254a47770163f9322333660ebdabf99ba49873da Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Tue, 4 Mar 2014 07:46:47 -0300
Subject: [media] v4l2: allow v4l2_subdev_edid to be used with video nodes

Struct v4l2_subdev_edid and the VIDIOC_SUBDEV_G/S_EDID ioctls were
specific for subdevices, but for hardware with a simple video pipeline
you do not need/want to create subdevice nodes to just get/set the EDID.

Move the v4l2_subdev_edid struct to v4l2-common.h and rename as
v4l2_edid. Add the same ioctls to videodev2.h as well, thus allowing
this API to be used with both video nodes and v4l-subdev nodes.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/v4l2-common.h |  8 ++++++++
 include/uapi/linux/v4l2-subdev.h | 14 +++++---------
 include/uapi/linux/videodev2.h   |  2 ++
 3 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/v4l2-common.h b/include/uapi/linux/v4l2-common.h
index 4f0667e010dd..270db8914c01 100644
--- a/include/uapi/linux/v4l2-common.h
+++ b/include/uapi/linux/v4l2-common.h
@@ -68,4 +68,12 @@
 #define V4L2_SUBDEV_SEL_FLAG_SIZE_LE	V4L2_SEL_FLAG_LE
 #define V4L2_SUBDEV_SEL_FLAG_KEEP_CONFIG V4L2_SEL_FLAG_KEEP_CONFIG
 
+struct v4l2_edid {
+	__u32 pad;
+	__u32 start_block;
+	__u32 blocks;
+	__u32 reserved[5];
+	__u8 __user *edid;
+};
+
 #endif /* __V4L2_COMMON__ */
diff --git a/include/uapi/linux/v4l2-subdev.h b/include/uapi/linux/v4l2-subdev.h
index a33c4daadce3..87e05159f637 100644
--- a/include/uapi/linux/v4l2-subdev.h
+++ b/include/uapi/linux/v4l2-subdev.h
@@ -148,13 +148,8 @@ struct v4l2_subdev_selection {
 	__u32 reserved[8];
 };
 
-struct v4l2_subdev_edid {
-	__u32 pad;
-	__u32 start_block;
-	__u32 blocks;
-	__u32 reserved[5];
-	__u8 __user *edid;
-};
+/* Backwards compatibility define --- to be removed */
+#define v4l2_subdev_edid v4l2_edid
 
 #define VIDIOC_SUBDEV_G_FMT	_IOWR('V',  4, struct v4l2_subdev_format)
 #define VIDIOC_SUBDEV_S_FMT	_IOWR('V',  5, struct v4l2_subdev_format)
@@ -174,7 +169,8 @@ struct v4l2_subdev_edid {
 	_IOWR('V', 61, struct v4l2_subdev_selection)
 #define VIDIOC_SUBDEV_S_SELECTION \
 	_IOWR('V', 62, struct v4l2_subdev_selection)
-#define VIDIOC_SUBDEV_G_EDID	_IOWR('V', 40, struct v4l2_subdev_edid)
-#define VIDIOC_SUBDEV_S_EDID	_IOWR('V', 41, struct v4l2_subdev_edid)
+/* These two G/S_EDID ioctls are identical to the ioctls in videodev2.h */
+#define VIDIOC_SUBDEV_G_EDID	_IOWR('V', 40, struct v4l2_edid)
+#define VIDIOC_SUBDEV_S_EDID	_IOWR('V', 41, struct v4l2_edid)
 
 #endif
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 17acba8c7f9f..339738a6e96b 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1913,6 +1913,8 @@ struct v4l2_create_buffers {
 #define VIDIOC_QUERYMENU	_IOWR('V', 37, struct v4l2_querymenu)
 #define VIDIOC_G_INPUT		 _IOR('V', 38, int)
 #define VIDIOC_S_INPUT		_IOWR('V', 39, int)
+#define VIDIOC_G_EDID		_IOWR('V', 40, struct v4l2_edid)
+#define VIDIOC_S_EDID		_IOWR('V', 41, struct v4l2_edid)
 #define VIDIOC_G_OUTPUT		 _IOR('V', 46, int)
 #define VIDIOC_S_OUTPUT		_IOWR('V', 47, int)
 #define VIDIOC_ENUMOUTPUT	_IOWR('V', 48, struct v4l2_output)
-- 
cgit v1.2.3


From 409332b65d3ed8cfa7a8030f1e9d52f372219642 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Thu, 13 Mar 2014 19:07:42 +1100
Subject: fs: Introduce FALLOC_FL_ZERO_RANGE flag for fallocate

Introduce new FALLOC_FL_ZERO_RANGE flag for fallocate. This has the same
functionality as xfs ioctl XFS_IOC_ZERO_RANGE.

It can be used to convert a range of file to zeros preferably without
issuing data IO. Blocks should be preallocated for the regions that span
holes in the file, and the entire range is preferable converted to
unwritten extents - even though file system may choose to zero out the
extent or do whatever which will result in reading zeros from the range
while the range remains allocated for the file.

This can be also used to preallocate blocks past EOF in the same way as
with fallocate. Flag FALLOC_FL_KEEP_SIZE which should cause the inode
size to remain the same.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/open.c                   |  7 ++++++-
 include/uapi/linux/falloc.h | 14 ++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/fs/open.c b/fs/open.c
index 4a923a547d10..c4465b2f8441 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -232,7 +232,12 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 
 	/* Return error if mode is not supported */
 	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
-		     FALLOC_FL_COLLAPSE_RANGE))
+		     FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
+		return -EOPNOTSUPP;
+
+	/* Punch hole and zero range are mutually exclusive */
+	if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
+	    (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
 		return -EOPNOTSUPP;
 
 	/* Punch hole must have keep size set */
diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h
index 5ff562ddac0b..d1197ae3723c 100644
--- a/include/uapi/linux/falloc.h
+++ b/include/uapi/linux/falloc.h
@@ -27,4 +27,18 @@
  */
 #define FALLOC_FL_COLLAPSE_RANGE	0x08
 
+/*
+ * FALLOC_FL_ZERO_RANGE is used to convert a range of file to zeros preferably
+ * without issuing data IO. Blocks should be preallocated for the regions that
+ * span holes in the file, and the entire range is preferable converted to
+ * unwritten extents - even though file system may choose to zero out the
+ * extent or do whatever which will result in reading zeros from the range
+ * while the range remains allocated for the file.
+ *
+ * This can be also used to preallocate blocks past EOF in the same way as
+ * with fallocate. Flag FALLOC_FL_KEEP_SIZE should cause the inode
+ * size to remain the same.
+ */
+#define FALLOC_FL_ZERO_RANGE		0x10
+
 #endif /* _UAPI_FALLOC_H_ */
-- 
cgit v1.2.3


From 100943c54e0947a07d2c0185368fc2fd848f7f28 Mon Sep 17 00:00:00 2001
From: "Gabriel L. Somlo" <gsomlo@gmail.com>
Date: Thu, 27 Feb 2014 23:06:17 -0500
Subject: kvm: x86: ignore ioapic polarity

Both QEMU and KVM have already accumulated a significant number of
optimizations based on the hard-coded assumption that ioapic polarity
will always use the ActiveHigh convention, where the logical and
physical states of level-triggered irq lines always match (i.e.,
active(asserted) == high == 1, inactive == low == 0). QEMU guests
are expected to follow directions given via ACPI and configure the
ioapic with polarity 0 (ActiveHigh). However, even when misbehaving
guests (e.g. OS X <= 10.9) set the ioapic polarity to 1 (ActiveLow),
QEMU will still use the ActiveHigh signaling convention when
interfacing with KVM.

This patch modifies KVM to completely ignore ioapic polarity as set by
the guest OS, enabling misbehaving guests to work alongside those which
comply with the ActiveHigh polarity specified by QEMU's ACPI tables.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Gabriel L. Somlo <somlo@cmu.edu>
[Move documentation to KVM_IRQ_LINE, add ia64. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 16 +++++++++++++++-
 arch/ia64/kvm/kvm-ia64.c          |  1 +
 arch/x86/kvm/x86.c                |  1 +
 include/uapi/linux/kvm.h          |  1 +
 virt/kvm/ioapic.c                 |  1 -
 5 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 6cd63a9010fb..4714f282a43e 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -612,6 +612,20 @@ On some architectures it is required that an interrupt controller model has
 been previously created with KVM_CREATE_IRQCHIP.  Note that edge-triggered
 interrupts require the level to be set to 1 and then back to 0.
 
+On real hardware, interrupt pins can be active-low or active-high.  This
+does not matter for the level field of struct kvm_irq_level: 1 always
+means active (asserted), 0 means inactive (deasserted).
+
+x86 allows the operating system to program the interrupt polarity
+(active-low/active-high) for level-triggered interrupts, and KVM used
+to consider the polarity.  However, due to bitrot in the handling of
+active-low interrupts, the above convention is now valid on x86 too.
+This is signaled by KVM_CAP_X86_IOAPIC_POLARITY_IGNORED.  Userspace
+should not present interrupts to the guest as active-low unless this
+capability is present (or unless it is not using the in-kernel irqchip,
+of course).
+
+
 ARM/arm64 can signal an interrupt either at the CPU level, or at the
 in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to
 use PPIs designated for specific cpus.  The irq field is interpreted
@@ -628,7 +642,7 @@ The irq_type field has the following values:
 
 (The irq_id field thus corresponds nicely to the IRQ ID in the ARM GIC specs)
 
-In both cases, level is used to raise/lower the line.
+In both cases, level is used to assert/deassert the line.
 
 struct kvm_irq_level {
 	union {
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 53f44bee9ebb..6a4309bb821a 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -199,6 +199,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_IRQCHIP:
 	case KVM_CAP_MP_STATE:
 	case KVM_CAP_IRQ_INJECT_STATUS:
+	case KVM_CAP_IOAPIC_POLARITY_IGNORED:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d906391a0a3f..a37da6b0165a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2657,6 +2657,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_KVMCLOCK_CTRL:
 	case KVM_CAP_READONLY_MEM:
 	case KVM_CAP_HYPERV_TIME:
+	case KVM_CAP_IOAPIC_POLARITY_IGNORED:
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_PCI_2_3:
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 7d76401d2bb5..a7518be31d53 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -740,6 +740,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_SPAPR_MULTITCE 94
 #define KVM_CAP_EXT_EMUL_CPUID 95
 #define KVM_CAP_HYPERV_TIME 96
+#define KVM_CAP_IOAPIC_POLARITY_IGNORED 97
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index ce9ed99ad7dc..1539d3757a04 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -328,7 +328,6 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
 	irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
 					 irq_source_id, level);
 	entry = ioapic->redirtbl[irq];
-	irq_level ^= entry.fields.polarity;
 	if (!irq_level) {
 		ioapic->irr &= ~mask;
 		ret = 1;
-- 
cgit v1.2.3


From 3ce569fd7c55ed99c04c4ebc5e49304f29a139bb Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Fri, 31 Jan 2014 23:36:13 -0300
Subject: [media] v4l: add RF tuner channel bandwidth control

Modern silicon RF tuners has one or more adjustable filters on
signal path, in order to filter noise from desired radio channel.

Add channel bandwidth control to tell the driver which is radio
channel width we want receive. Filters could be then adjusted by
the driver or hardware, using RF frequency and channel bandwidth
as a base of filter calculations.

On automatic mode (normal mode), bandwidth is calculated from sampling
rate or tuning info got from userspace. That new control gives
possibility to set manual mode and let user have more control for
filters.

Cc: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/v4l2-core/v4l2-ctrls.c | 4 ++++
 include/uapi/linux/v4l2-controls.h   | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 5c3e8ca9b1d1..48550b0eca2e 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -867,6 +867,8 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_RF_TUNER_MIXER_GAIN:	return "Mixer Gain";
 	case V4L2_CID_RF_TUNER_IF_GAIN_AUTO:	return "IF Gain, Auto";
 	case V4L2_CID_RF_TUNER_IF_GAIN:		return "IF Gain";
+	case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO:	return "Bandwidth, Auto";
+	case V4L2_CID_RF_TUNER_BANDWIDTH:	return "Bandwidth";
 	default:
 		return NULL;
 	}
@@ -919,6 +921,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_RF_TUNER_LNA_GAIN_AUTO:
 	case V4L2_CID_RF_TUNER_MIXER_GAIN_AUTO:
 	case V4L2_CID_RF_TUNER_IF_GAIN_AUTO:
+	case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO:
 		*type = V4L2_CTRL_TYPE_BOOLEAN;
 		*min = 0;
 		*max = *step = 1;
@@ -1084,6 +1087,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_RF_TUNER_LNA_GAIN:
 	case V4L2_CID_RF_TUNER_MIXER_GAIN:
 	case V4L2_CID_RF_TUNER_IF_GAIN:
+	case V4L2_CID_RF_TUNER_BANDWIDTH:
 		*flags |= V4L2_CTRL_FLAG_SLIDER;
 		break;
 	case V4L2_CID_PAN_RELATIVE:
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 6501c0b2860e..60a626ca47c8 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -910,5 +910,7 @@ enum v4l2_deemphasis {
 #define V4L2_CID_RF_TUNER_MIXER_GAIN		(V4L2_CID_RF_TUNER_CLASS_BASE + 4)
 #define V4L2_CID_RF_TUNER_IF_GAIN_AUTO		(V4L2_CID_RF_TUNER_CLASS_BASE + 5)
 #define V4L2_CID_RF_TUNER_IF_GAIN		(V4L2_CID_RF_TUNER_CLASS_BASE + 6)
+#define V4L2_CID_RF_TUNER_BANDWIDTH_AUTO	(V4L2_CID_RF_TUNER_CLASS_BASE + 7)
+#define V4L2_CID_RF_TUNER_BANDWIDTH		(V4L2_CID_RF_TUNER_CLASS_BASE + 8)
 
 #endif
-- 
cgit v1.2.3


From 835b87c7adecef13bbc2a32c8e8437201144e9c4 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Tue, 4 Feb 2014 22:13:44 -0300
Subject: [media] v4l: reorganize RF tuner control ID numbers

It appears that controls are ordered by ID number when enumerating.
That could lead illogical UI as controls are usually enumerated and
drawn by the application at runtime.

Change order of controls by reorganizing assigned IDs now as we can.
It is not reasonable possible after the API is released. Also, leave
some spare space between IDs too for possible future extensions.

Cc: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/v4l2-controls.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 60a626ca47c8..405279f3c326 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -904,13 +904,13 @@ enum v4l2_deemphasis {
 #define V4L2_CID_RF_TUNER_CLASS_BASE		(V4L2_CTRL_CLASS_RF_TUNER | 0x900)
 #define V4L2_CID_RF_TUNER_CLASS			(V4L2_CTRL_CLASS_RF_TUNER | 1)
 
-#define V4L2_CID_RF_TUNER_LNA_GAIN_AUTO		(V4L2_CID_RF_TUNER_CLASS_BASE + 1)
-#define V4L2_CID_RF_TUNER_LNA_GAIN		(V4L2_CID_RF_TUNER_CLASS_BASE + 2)
-#define V4L2_CID_RF_TUNER_MIXER_GAIN_AUTO	(V4L2_CID_RF_TUNER_CLASS_BASE + 3)
-#define V4L2_CID_RF_TUNER_MIXER_GAIN		(V4L2_CID_RF_TUNER_CLASS_BASE + 4)
-#define V4L2_CID_RF_TUNER_IF_GAIN_AUTO		(V4L2_CID_RF_TUNER_CLASS_BASE + 5)
-#define V4L2_CID_RF_TUNER_IF_GAIN		(V4L2_CID_RF_TUNER_CLASS_BASE + 6)
-#define V4L2_CID_RF_TUNER_BANDWIDTH_AUTO	(V4L2_CID_RF_TUNER_CLASS_BASE + 7)
-#define V4L2_CID_RF_TUNER_BANDWIDTH		(V4L2_CID_RF_TUNER_CLASS_BASE + 8)
+#define V4L2_CID_RF_TUNER_BANDWIDTH_AUTO	(V4L2_CID_RF_TUNER_CLASS_BASE + 11)
+#define V4L2_CID_RF_TUNER_BANDWIDTH		(V4L2_CID_RF_TUNER_CLASS_BASE + 12)
+#define V4L2_CID_RF_TUNER_LNA_GAIN_AUTO		(V4L2_CID_RF_TUNER_CLASS_BASE + 41)
+#define V4L2_CID_RF_TUNER_LNA_GAIN		(V4L2_CID_RF_TUNER_CLASS_BASE + 42)
+#define V4L2_CID_RF_TUNER_MIXER_GAIN_AUTO	(V4L2_CID_RF_TUNER_CLASS_BASE + 51)
+#define V4L2_CID_RF_TUNER_MIXER_GAIN		(V4L2_CID_RF_TUNER_CLASS_BASE + 52)
+#define V4L2_CID_RF_TUNER_IF_GAIN_AUTO		(V4L2_CID_RF_TUNER_CLASS_BASE + 61)
+#define V4L2_CID_RF_TUNER_IF_GAIN		(V4L2_CID_RF_TUNER_CLASS_BASE + 62)
 
 #endif
-- 
cgit v1.2.3


From 00419a6ab590b6cf584f704abbb56b7a5c388672 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Tue, 4 Feb 2014 23:55:25 -0300
Subject: [media] v4l: uapi: add SDR formats CU8 and CU16LE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

V4L2_SDR_FMT_CU8 — Complex unsigned 8-bit IQ sample
V4L2_SDR_FMT_CU16LE — Complex unsigned 16-bit little endian IQ sample

Cc: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/videodev2.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 339738a6e96b..35f4a060fafe 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -436,6 +436,10 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_SE401      v4l2_fourcc('S', '4', '0', '1') /* se401 janggu compressed rgb */
 #define V4L2_PIX_FMT_S5C_UYVY_JPG v4l2_fourcc('S', '5', 'C', 'I') /* S5C73M3 interleaved UYVY/JPEG */
 
+/* SDR formats - used only for Software Defined Radio devices */
+#define V4L2_SDR_FMT_CU8          v4l2_fourcc('C', 'U', '0', '8') /* IQ u8 */
+#define V4L2_SDR_FMT_CU16LE       v4l2_fourcc('C', 'U', '1', '6') /* IQ u16le */
+
 /*
  *	F O R M A T   E N U M E R A T I O N
  */
-- 
cgit v1.2.3


From 9aa4357e9b10b92acb85e30834f8eb4aa7b94554 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Fri, 7 Feb 2014 02:46:16 -0300
Subject: [media] v4l: add control for RF tuner PLL lock flag

Add volatile boolean control to indicate if tuner frequency synthesizer
is locked to requested frequency. That means tuner is able to receive
given frequency. Control is named as "PLL lock", since frequency
synthesizers are based of phase-locked-loop. Maybe more general name
could be wise still?

Cc: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/v4l2-core/v4l2-ctrls.c | 5 +++++
 include/uapi/linux/v4l2-controls.h   | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 48550b0eca2e..55c683254102 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -869,6 +869,7 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_RF_TUNER_IF_GAIN:		return "IF Gain";
 	case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO:	return "Bandwidth, Auto";
 	case V4L2_CID_RF_TUNER_BANDWIDTH:	return "Bandwidth";
+	case V4L2_CID_RF_TUNER_PLL_LOCK:	return "PLL Lock";
 	default:
 		return NULL;
 	}
@@ -922,6 +923,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_RF_TUNER_MIXER_GAIN_AUTO:
 	case V4L2_CID_RF_TUNER_IF_GAIN_AUTO:
 	case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO:
+	case V4L2_CID_RF_TUNER_PLL_LOCK:
 		*type = V4L2_CTRL_TYPE_BOOLEAN;
 		*min = 0;
 		*max = *step = 1;
@@ -1106,6 +1108,9 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_DV_RX_POWER_PRESENT:
 		*flags |= V4L2_CTRL_FLAG_READ_ONLY;
 		break;
+	case V4L2_CID_RF_TUNER_PLL_LOCK:
+		*flags |= V4L2_CTRL_FLAG_VOLATILE;
+		break;
 	}
 }
 EXPORT_SYMBOL(v4l2_ctrl_fill);
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 405279f3c326..2ac5597f3ee1 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -912,5 +912,6 @@ enum v4l2_deemphasis {
 #define V4L2_CID_RF_TUNER_MIXER_GAIN		(V4L2_CID_RF_TUNER_CLASS_BASE + 52)
 #define V4L2_CID_RF_TUNER_IF_GAIN_AUTO		(V4L2_CID_RF_TUNER_CLASS_BASE + 61)
 #define V4L2_CID_RF_TUNER_IF_GAIN		(V4L2_CID_RF_TUNER_CLASS_BASE + 62)
+#define V4L2_CID_RF_TUNER_PLL_LOCK			(V4L2_CID_RF_TUNER_CLASS_BASE + 91)
 
 #endif
-- 
cgit v1.2.3


From 87185c958de9cd4acd8392f00d6161f4e11807ff Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Mon, 10 Mar 2014 10:43:24 -0300
Subject: [media] v4l: rename v4l2_format_sdr to v4l2_sdr_format

Rename v4l2_format_sdr to v4l2_sdr_format in order to keep it in
line with other formats.

Reported-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 Documentation/DocBook/media/v4l/dev-sdr.xml | 2 +-
 drivers/media/v4l2-core/v4l2-ioctl.c        | 2 +-
 include/uapi/linux/videodev2.h              | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/dev-sdr.xml b/Documentation/DocBook/media/v4l/dev-sdr.xml
index ac9f1af35267..524b9c402421 100644
--- a/Documentation/DocBook/media/v4l/dev-sdr.xml
+++ b/Documentation/DocBook/media/v4l/dev-sdr.xml
@@ -78,7 +78,7 @@ of the data format.
     </para>
 
     <table pgwide="1" frame="none" id="v4l2-format-sdr">
-      <title>struct <structname>v4l2_format_sdr</structname></title>
+      <title>struct <structname>v4l2_sdr_format</structname></title>
       <tgroup cols="3">
         &cs-str;
         <tbody valign="top">
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 6536e15c45e5..d9113cc71c77 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -246,7 +246,7 @@ static void v4l_print_format(const void *arg, bool write_only)
 	const struct v4l2_vbi_format *vbi;
 	const struct v4l2_sliced_vbi_format *sliced;
 	const struct v4l2_window *win;
-	const struct v4l2_format_sdr *sdr;
+	const struct v4l2_sdr_format *sdr;
 	unsigned i;
 
 	pr_cont("type=%s", prt_names(p->type, v4l2_type_names));
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 35f4a060fafe..e35ad6ca1e8a 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1714,10 +1714,10 @@ struct v4l2_pix_format_mplane {
 } __attribute__ ((packed));
 
 /**
- * struct v4l2_format_sdr - SDR format definition
+ * struct v4l2_sdr_format - SDR format definition
  * @pixelformat:	little endian four character code (fourcc)
  */
-struct v4l2_format_sdr {
+struct v4l2_sdr_format {
 	__u32				pixelformat;
 	__u8				reserved[28];
 } __attribute__ ((packed));
@@ -1740,7 +1740,7 @@ struct v4l2_format {
 		struct v4l2_window		win;     /* V4L2_BUF_TYPE_VIDEO_OVERLAY */
 		struct v4l2_vbi_format		vbi;     /* V4L2_BUF_TYPE_VBI_CAPTURE */
 		struct v4l2_sliced_vbi_format	sliced;  /* V4L2_BUF_TYPE_SLICED_VBI_CAPTURE */
-		struct v4l2_format_sdr		sdr;     /* V4L2_BUF_TYPE_SDR_CAPTURE */
+		struct v4l2_sdr_format		sdr;     /* V4L2_BUF_TYPE_SDR_CAPTURE */
 		__u8	raw_data[200];                   /* user-defined */
 	} fmt;
 };
-- 
cgit v1.2.3


From 61ccbb684421d374fdcd7cf5d6b024b06f03ce4e Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Thu, 13 Mar 2014 12:41:57 +0100
Subject: ether: add loopback type ETH_P_LOOPBACK

Per IEEE 802.3*, the correct packet type for loopback 0x9000. There's
already one ETH_P_LOOP 0x0060, which has been there for ages, however it's
plainly wrong as anything that small is considered a length field.

We can't remove it because legacy, so add a new type which corresponds to
the correct id.

http://www.iana.org/assignments/ieee-802-numbers/ieee-802-numbers.xhtml

CC: "David S. Miller" <davem@davemloft.net>
CC: Stefan Richter <stefanr@s5r6.in-berlin.de>
CC: Simon Wunderlich <sw@simonwunderlich.de>
CC: Neil Jerram <Neil.Jerram@metaswitch.com>
CC: Simon Horman <horms@verge.net.au>
CC: Arvid Brodin <Arvid.Brodin@xdin.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_ether.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 750ba67e0dc3..0f8210b8e0bc 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -90,6 +90,7 @@
 #define ETH_P_TDLS	0x890D          /* TDLS */
 #define ETH_P_FIP	0x8914		/* FCoE Initialization Protocol */
 #define ETH_P_80221	0x8917		/* IEEE 802.21 Media Independent Handover Protocol */
+#define ETH_P_LOOPBACK	0x9000		/* Ethernet loopback packet, per IEEE 802.3 */
 #define ETH_P_QINQ1	0x9100		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
 #define ETH_P_QINQ2	0x9200		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
 #define ETH_P_QINQ3	0x9300		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
-- 
cgit v1.2.3


From 2509671dcf6314635592f8eb84ef4793fce11a07 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Thu, 20 Mar 2014 13:52:28 +0100
Subject: isdn: capi: fix "CAPI_VERSION" comment

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/uapi/linux/capi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/capi.h b/include/uapi/linux/capi.h
index 65100d6cb89b..7b145fd7afb6 100644
--- a/include/uapi/linux/capi.h
+++ b/include/uapi/linux/capi.h
@@ -102,7 +102,7 @@ typedef struct capi_manufacturer_cmd {
 
 /*
  * member contr is input for
- * CAPI_GET_MANUFACTURER, CAPI_VERSION, CAPI_GET_SERIAL
+ * CAPI_GET_MANUFACTURER, CAPI_GET_VERSION, CAPI_GET_SERIAL
  * and CAPI_GET_PROFILE
  */
 typedef union capi_ioctl_struct {
-- 
cgit v1.2.3


From 3f1c82502c299da08b7b7f08b435212e51166ed9 Mon Sep 17 00:00:00 2001
From: William Roberts <bill.c.roberts@gmail.com>
Date: Tue, 11 Feb 2014 10:12:01 -0800
Subject: audit: Audit proc/<pid>/cmdline aka proctitle

During an audit event, cache and print the value of the process's
proctitle value (proc/<pid>/cmdline). This is useful in situations
where processes are started via fork'd virtual machines where the
comm field is incorrect. Often times, setting the comm field still
is insufficient as the comm width is not very wide and most
virtual machine "package names" do not fit. Also, during execution,
many threads have their comm field set as well. By tying it back to
the global cmdline value for the process, audit records will be more
complete in systems with these properties. An example of where this
is useful and applicable is in the realm of Android. With Android,
their is no fork/exec for VM instances. The bare, preloaded Dalvik
VM listens for a fork and specialize request. When this request comes
in, the VM forks, and the loads the specific application (specializing).
This was done to take advantage of COW and to not require a load of
basic packages by the VM on very app spawn. When this spawn occurs,
the package name is set via setproctitle() and shows up in procfs.
Many of these package names are longer then 16 bytes, the historical
width of task->comm. Having the cmdline in the audit records will
couple the application back to the record directly. Also, on my
Debian development box, some audit records were more useful then
what was printed under comm.

The cached proctitle is tied to the life-cycle of the audit_context
structure and is built on demand.

Proctitle is controllable by userspace, and thus should not be trusted.
It is meant as an aid to assist in debugging. The proctitle event is
emitted during syscall audits, and can be filtered with auditctl.

Example:
type=AVC msg=audit(1391217013.924:386): avc:  denied  { getattr } for  pid=1971 comm="mkdir" name="/" dev="selinuxfs" ino=1 scontext=system_u:system_r:consolekit_t:s0-s0:c0.c255 tcontext=system_u:object_r:security_t:s0 tclass=filesystem
type=SYSCALL msg=audit(1391217013.924:386): arch=c000003e syscall=137 success=yes exit=0 a0=7f019dfc8bd7 a1=7fffa6aed2c0 a2=fffffffffff4bd25 a3=7fffa6aed050 items=0 ppid=1967 pid=1971 auid=4294967295 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=(none) ses=4294967295 comm="mkdir" exe="/bin/mkdir" subj=system_u:system_r:consolekit_t:s0-s0:c0.c255 key=(null)
type=UNKNOWN[1327] msg=audit(1391217013.924:386):  proctitle=6D6B646972002D70002F7661722F72756E2F636F6E736F6C65

Acked-by: Steve Grubb <sgrubb@redhat.com> (wrt record formating)

Signed-off-by: William Roberts <wroberts@tresys.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/uapi/linux/audit.h |  1 +
 kernel/audit.h             |  6 +++++
 kernel/auditsc.c           | 67 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 74 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 2d48fe1274ca..4315ee99b967 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -109,6 +109,7 @@
 #define AUDIT_NETFILTER_PKT	1324	/* Packets traversing netfilter chains */
 #define AUDIT_NETFILTER_CFG	1325	/* Netfilter chain modifications */
 #define AUDIT_SECCOMP		1326	/* Secure Computing event */
+#define AUDIT_PROCTITLE		1327	/* Proctitle emit event */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
diff --git a/kernel/audit.h b/kernel/audit.h
index 57cc64d67718..38c967d28de5 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -106,6 +106,11 @@ struct audit_names {
 	bool			should_free;
 };
 
+struct audit_proctitle {
+	int	len;	/* length of the cmdline field. */
+	char	*value;	/* the cmdline field */
+};
+
 /* The per-task audit context. */
 struct audit_context {
 	int		    dummy;	/* must be the first element */
@@ -202,6 +207,7 @@ struct audit_context {
 		} execve;
 	};
 	int fds[2];
+	struct audit_proctitle proctitle;
 
 #if AUDIT_DEBUG
 	int		    put_count;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 6874c1fd453d..043d1ef9362f 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -70,6 +70,7 @@
 #include <linux/capability.h>
 #include <linux/fs_struct.h>
 #include <linux/compat.h>
+#include <linux/ctype.h>
 
 #include "audit.h"
 
@@ -81,6 +82,9 @@
 /* no execve audit message should be longer than this (userspace limits) */
 #define MAX_EXECVE_AUDIT_LEN 7500
 
+/* max length to print of cmdline/proctitle value during audit */
+#define MAX_PROCTITLE_AUDIT_LEN 128
+
 /* number of audit rules */
 int audit_n_rules;
 
@@ -844,6 +848,13 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk,
 	return context;
 }
 
+static inline void audit_proctitle_free(struct audit_context *context)
+{
+	kfree(context->proctitle.value);
+	context->proctitle.value = NULL;
+	context->proctitle.len = 0;
+}
+
 static inline void audit_free_names(struct audit_context *context)
 {
 	struct audit_names *n, *next;
@@ -956,6 +967,7 @@ static inline void audit_free_context(struct audit_context *context)
 	audit_free_aux(context);
 	kfree(context->filterkey);
 	kfree(context->sockaddr);
+	audit_proctitle_free(context);
 	kfree(context);
 }
 
@@ -1272,6 +1284,59 @@ static void show_special(struct audit_context *context, int *call_panic)
 	audit_log_end(ab);
 }
 
+static inline int audit_proctitle_rtrim(char *proctitle, int len)
+{
+	char *end = proctitle + len - 1;
+	while (end > proctitle && !isprint(*end))
+		end--;
+
+	/* catch the case where proctitle is only 1 non-print character */
+	len = end - proctitle + 1;
+	len -= isprint(proctitle[len-1]) == 0;
+	return len;
+}
+
+static void audit_log_proctitle(struct task_struct *tsk,
+			 struct audit_context *context)
+{
+	int res;
+	char *buf;
+	char *msg = "(null)";
+	int len = strlen(msg);
+	struct audit_buffer *ab;
+
+	ab = audit_log_start(context, GFP_KERNEL, AUDIT_PROCTITLE);
+	if (!ab)
+		return;	/* audit_panic or being filtered */
+
+	audit_log_format(ab, "proctitle=");
+
+	/* Not  cached */
+	if (!context->proctitle.value) {
+		buf = kmalloc(MAX_PROCTITLE_AUDIT_LEN, GFP_KERNEL);
+		if (!buf)
+			goto out;
+		/* Historically called this from procfs naming */
+		res = get_cmdline(tsk, buf, MAX_PROCTITLE_AUDIT_LEN);
+		if (res == 0) {
+			kfree(buf);
+			goto out;
+		}
+		res = audit_proctitle_rtrim(buf, res);
+		if (res == 0) {
+			kfree(buf);
+			goto out;
+		}
+		context->proctitle.value = buf;
+		context->proctitle.len = res;
+	}
+	msg = context->proctitle.value;
+	len = context->proctitle.len;
+out:
+	audit_log_n_untrustedstring(ab, msg, len);
+	audit_log_end(ab);
+}
+
 static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
 {
 	int i, call_panic = 0;
@@ -1389,6 +1454,8 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 		audit_log_name(context, n, NULL, i++, &call_panic);
 	}
 
+	audit_log_proctitle(tsk, context);
+
 	/* Send end of event record to help user space know we are finished */
 	ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE);
 	if (ab)
-- 
cgit v1.2.3


From 4b58841149dcaa500ceba1d5378ae70622fe4899 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Sat, 15 Mar 2014 14:48:00 +0900
Subject: audit: Add generic compat syscall support

lib/audit.c provides a generic function for auditing system calls.
This patch extends it for compat syscall support on bi-architectures
(32/64-bit) by adding lib/compat_audit.c.
What is required to support this feature are:
 * add asm/unistd32.h for compat system call names
 * select CONFIG_AUDIT_ARCH_COMPAT_GENERIC

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Acked-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h      |  8 ++++++++
 include/uapi/linux/audit.h |  6 ++++++
 lib/Kconfig                |  9 +++++++++
 lib/Makefile               |  1 +
 lib/audit.c                | 15 +++++++++++++-
 lib/compat_audit.c         | 50 ++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 88 insertions(+), 1 deletion(-)
 create mode 100644 lib/compat_audit.c

(limited to 'include/uapi/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index ec1464df4c60..4b2983e25ce0 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -79,6 +79,14 @@ extern int is_audit_feature_set(int which);
 extern int __init audit_register_class(int class, unsigned *list);
 extern int audit_classify_syscall(int abi, unsigned syscall);
 extern int audit_classify_arch(int arch);
+/* only for compat system calls */
+extern unsigned compat_write_class[];
+extern unsigned compat_read_class[];
+extern unsigned compat_dir_class[];
+extern unsigned compat_chattr_class[];
+extern unsigned compat_signal_class[];
+
+extern int __weak audit_classify_compat_syscall(int abi, unsigned syscall);
 
 /* audit_names->type values */
 #define	AUDIT_TYPE_UNKNOWN	0	/* we don't know yet */
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 4315ee99b967..9af01d77dc44 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -362,6 +362,12 @@ enum {
 #define AUDIT_ARCH_SPARC64	(EM_SPARCV9|__AUDIT_ARCH_64BIT)
 #define AUDIT_ARCH_X86_64	(EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 
+#ifdef CONFIG_COMPAT
+#define audit_is_compat(arch)	(!((arch) & __AUDIT_ARCH_64BIT))
+#else
+#define audit_is_compat(arch)	false
+#endif
+
 #define AUDIT_PERM_EXEC		1
 #define AUDIT_PERM_WRITE	2
 #define AUDIT_PERM_READ		4
diff --git a/lib/Kconfig b/lib/Kconfig
index 991c98bc4a3f..1e80cb3c77a9 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -182,6 +182,15 @@ config AUDIT_GENERIC
 	depends on AUDIT && !AUDIT_ARCH
 	default y
 
+config AUDIT_ARCH_COMPAT_GENERIC
+	bool
+	default n
+
+config AUDIT_COMPAT_GENERIC
+	bool
+	depends on AUDIT_GENERIC && AUDIT_ARCH_COMPAT_GENERIC && COMPAT
+	default y
+
 config RANDOM32_SELFTEST
 	bool "PRNG perform self test on init"
 	default n
diff --git a/lib/Makefile b/lib/Makefile
index a459c31e8c6b..972552b39cf5 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -93,6 +93,7 @@ obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o
 obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o
 obj-$(CONFIG_SMP) += percpu_counter.o
 obj-$(CONFIG_AUDIT_GENERIC) += audit.o
+obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
 
 obj-$(CONFIG_SWIOTLB) += swiotlb.o
 obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
diff --git a/lib/audit.c b/lib/audit.c
index 76bbed4a20e5..1d726a22565b 100644
--- a/lib/audit.c
+++ b/lib/audit.c
@@ -30,11 +30,17 @@ static unsigned signal_class[] = {
 
 int audit_classify_arch(int arch)
 {
-	return 0;
+	if (audit_is_compat(arch))
+		return 1;
+	else
+		return 0;
 }
 
 int audit_classify_syscall(int abi, unsigned syscall)
 {
+	if (audit_is_compat(abi))
+		return audit_classify_compat_syscall(abi, syscall);
+
 	switch(syscall) {
 #ifdef __NR_open
 	case __NR_open:
@@ -57,6 +63,13 @@ int audit_classify_syscall(int abi, unsigned syscall)
 
 static int __init audit_classes_init(void)
 {
+#ifdef CONFIG_AUDIT_COMPAT_GENERIC
+	audit_register_class(AUDIT_CLASS_WRITE_32, compat_write_class);
+	audit_register_class(AUDIT_CLASS_READ_32, compat_read_class);
+	audit_register_class(AUDIT_CLASS_DIR_WRITE_32, compat_dir_class);
+	audit_register_class(AUDIT_CLASS_CHATTR_32, compat_chattr_class);
+	audit_register_class(AUDIT_CLASS_SIGNAL_32, compat_signal_class);
+#endif
 	audit_register_class(AUDIT_CLASS_WRITE, write_class);
 	audit_register_class(AUDIT_CLASS_READ, read_class);
 	audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
diff --git a/lib/compat_audit.c b/lib/compat_audit.c
new file mode 100644
index 000000000000..873f75b640ab
--- /dev/null
+++ b/lib/compat_audit.c
@@ -0,0 +1,50 @@
+#include <linux/init.h>
+#include <linux/types.h>
+#include <asm/unistd32.h>
+
+unsigned compat_dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+unsigned compat_read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+unsigned compat_write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+unsigned compat_chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+unsigned compat_signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_compat_syscall(int abi, unsigned syscall)
+{
+	switch (syscall) {
+#ifdef __NR_open
+	case __NR_open:
+		return 2;
+#endif
+#ifdef __NR_openat
+	case __NR_openat:
+		return 3;
+#endif
+#ifdef __NR_socketcall
+	case __NR_socketcall:
+		return 4;
+#endif
+	case __NR_execve:
+		return 5;
+	default:
+		return 1;
+	}
+}
-- 
cgit v1.2.3


From bfe9b3f8c5229e5de4fd18e941866bc410d16334 Mon Sep 17 00:00:00 2001
From: Ben Chan <benchan@chromium.org>
Date: Wed, 19 Mar 2014 14:00:05 -0700
Subject: USB: cdc: add MBIM extended functional descriptor structure

This patch adds the MBIM extended functional descriptor structure
defined in "Universal Serial Bus Communications Class Subclass
Specification for Mobile Broadband Interface Model, Revision 1.0,
Errata-1" published by USB-IF.

Signed-off-by: Ben Chan <benchan@chromium.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/usb/cdc.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/usb/cdc.h b/include/uapi/linux/usb/cdc.h
index f35aa0a338c7..b6a9cdd6e096 100644
--- a/include/uapi/linux/usb/cdc.h
+++ b/include/uapi/linux/usb/cdc.h
@@ -56,6 +56,7 @@
 #define USB_CDC_OBEX_TYPE		0x15
 #define USB_CDC_NCM_TYPE		0x1a
 #define USB_CDC_MBIM_TYPE		0x1b
+#define USB_CDC_MBIM_EXTENDED_TYPE	0x1c
 
 /* "Header Functional Descriptor" from CDC spec  5.2.3.1 */
 struct usb_cdc_header_desc {
@@ -205,6 +206,17 @@ struct usb_cdc_mbim_desc {
 	__u8    bmNetworkCapabilities;
 } __attribute__ ((packed));
 
+/* "MBIM Extended Functional Descriptor" from CDC MBIM spec 1.0 errata-1 */
+struct usb_cdc_mbim_extended_desc {
+	__u8	bLength;
+	__u8	bDescriptorType;
+	__u8	bDescriptorSubType;
+
+	__le16	bcdMBIMExtendedVersion;
+	__u8	bMaxOutstandingCommandMessages;
+	__le16	wMTU;
+} __attribute__ ((packed));
+
 /*-------------------------------------------------------------------------*/
 
 /*
-- 
cgit v1.2.3


From d938dc55225a7212e7f31c5a8571da304cc3de16 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Wed, 23 Oct 2013 18:26:34 +0200
Subject: KVM: Add per-vm capability enablement.

Allow KVM_ENABLE_CAP to act on a vm as well as on a vcpu. This makes more
sense when the caller wants to enable a vm-related capability.

s390 will be the first user; wire it up.

Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 Documentation/virtual/kvm/api.txt |  6 ++++--
 arch/s390/kvm/kvm-s390.c          | 24 ++++++++++++++++++++++++
 include/uapi/linux/kvm.h          |  5 +++++
 3 files changed, 33 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 4714f282a43e..faf6fe9772c7 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -932,9 +932,9 @@ documentation when it pops into existence).
 
 4.37 KVM_ENABLE_CAP
 
-Capability: KVM_CAP_ENABLE_CAP
+Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM
 Architectures: ppc, s390
-Type: vcpu ioctl
+Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM)
 Parameters: struct kvm_enable_cap (in)
 Returns: 0 on success; -1 on error
 
@@ -965,6 +965,8 @@ function properly, this is the place to put them.
        __u8  pad[64];
 };
 
+The vcpu ioctl should be used for vcpu-specific capabilities, the vm ioctl
+for vm-wide capabilities.
 
 4.38 KVM_GET_MP_STATE
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 7337c577e949..9f1e99f12d4f 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -159,6 +159,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_S390_CSS_SUPPORT:
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_DEVICE_CTRL:
+	case KVM_CAP_ENABLE_CAP_VM:
 		r = 1;
 		break;
 	case KVM_CAP_NR_VCPUS:
@@ -187,6 +188,21 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	return 0;
 }
 
+static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+{
+	int r;
+
+	if (cap->flags)
+		return -EINVAL;
+
+	switch (cap->cap) {
+	default:
+		r = -EINVAL;
+		break;
+	}
+	return r;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg)
 {
@@ -204,6 +220,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_s390_inject_vm(kvm, &s390int);
 		break;
 	}
+	case KVM_ENABLE_CAP: {
+		struct kvm_enable_cap cap;
+		r = -EFAULT;
+		if (copy_from_user(&cap, argp, sizeof(cap)))
+			break;
+		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a7518be31d53..46ea1b470c76 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -741,6 +741,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_EXT_EMUL_CPUID 95
 #define KVM_CAP_HYPERV_TIME 96
 #define KVM_CAP_IOAPIC_POLARITY_IGNORED 97
+#define KVM_CAP_ENABLE_CAP_VM 98
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1076,6 +1077,10 @@ struct kvm_s390_ucas_mapping {
 /* Available with KVM_CAP_DEBUGREGS */
 #define KVM_GET_DEBUGREGS         _IOR(KVMIO,  0xa1, struct kvm_debugregs)
 #define KVM_SET_DEBUGREGS         _IOW(KVMIO,  0xa2, struct kvm_debugregs)
+/*
+ * vcpu version available with KVM_ENABLE_CAP
+ * vm version available with KVM_CAP_ENABLE_CAP_VM
+ */
 #define KVM_ENABLE_CAP            _IOW(KVMIO,  0xa3, struct kvm_enable_cap)
 /* Available with KVM_CAP_XSAVE */
 #define KVM_GET_XSAVE		  _IOR(KVMIO,  0xa4, struct kvm_xsave)
-- 
cgit v1.2.3


From 84223598778ba08041f4297fda485df83414d57e Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Mon, 15 Jul 2013 13:36:01 +0200
Subject: KVM: s390: irq routing for adapter interrupts.

Introduce a new interrupt class for s390 adapter interrupts and enable
irqfds for s390.

This is depending on a new s390 specific vm capability, KVM_CAP_S390_IRQCHIP,
that needs to be enabled by userspace.

Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 Documentation/virtual/kvm/api.txt |  21 +++++--
 arch/s390/include/asm/kvm_host.h  |   9 +++
 arch/s390/kvm/Kconfig             |   2 +
 arch/s390/kvm/Makefile            |   2 +-
 arch/s390/kvm/interrupt.c         | 121 ++++++++++++++++++++++++++++++++++++++
 arch/s390/kvm/irq.h               |  22 +++++++
 arch/s390/kvm/kvm-s390.c          |  17 ++++++
 include/linux/kvm_host.h          |   9 +++
 include/uapi/linux/kvm.h          |  11 ++++
 9 files changed, 209 insertions(+), 5 deletions(-)
 create mode 100644 arch/s390/kvm/irq.h

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index faf6fe9772c7..2cb1640a90ad 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -586,8 +586,8 @@ struct kvm_fpu {
 
 4.24 KVM_CREATE_IRQCHIP
 
-Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64, ARM, arm64
+Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390)
+Architectures: x86, ia64, ARM, arm64, s390
 Type: vm ioctl
 Parameters: none
 Returns: 0 on success, -1 on error
@@ -596,7 +596,10 @@ Creates an interrupt controller model in the kernel.  On x86, creates a virtual
 ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
 local APIC.  IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
 only go to the IOAPIC.  On ia64, a IOSAPIC is created. On ARM/arm64, a GIC is
-created.
+created. On s390, a dummy irq routing table is created.
+
+Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled
+before KVM_CREATE_IRQCHIP can be used.
 
 
 4.25 KVM_IRQ_LINE
@@ -1336,7 +1339,7 @@ KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed.
 4.52 KVM_SET_GSI_ROUTING
 
 Capability: KVM_CAP_IRQ_ROUTING
-Architectures: x86 ia64
+Architectures: x86 ia64 s390
 Type: vm ioctl
 Parameters: struct kvm_irq_routing (in)
 Returns: 0 on success, -1 on error
@@ -1359,6 +1362,7 @@ struct kvm_irq_routing_entry {
 	union {
 		struct kvm_irq_routing_irqchip irqchip;
 		struct kvm_irq_routing_msi msi;
+		struct kvm_irq_routing_s390_adapter adapter;
 		__u32 pad[8];
 	} u;
 };
@@ -1366,6 +1370,7 @@ struct kvm_irq_routing_entry {
 /* gsi routing entry types */
 #define KVM_IRQ_ROUTING_IRQCHIP 1
 #define KVM_IRQ_ROUTING_MSI 2
+#define KVM_IRQ_ROUTING_S390_ADAPTER 3
 
 No flags are specified so far, the corresponding field must be set to zero.
 
@@ -1381,6 +1386,14 @@ struct kvm_irq_routing_msi {
 	__u32 pad;
 };
 
+struct kvm_irq_routing_s390_adapter {
+	__u64 ind_addr;
+	__u64 summary_addr;
+	__u64 ind_offset;
+	__u32 summary_offset;
+	__u32 adapter_id;
+};
+
 
 4.53 KVM_ASSIGN_SET_MSIX_NR
 
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 0d5235262707..dd3933754d23 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -24,6 +24,14 @@
 #define KVM_MAX_VCPUS 64
 #define KVM_USER_MEM_SLOTS 32
 
+/*
+ * These seem to be used for allocating ->chip in the routing table,
+ * which we don't use. 4096 is an out-of-thin-air value. If we need
+ * to look at ->chip later on, we'll need to revisit this.
+ */
+#define KVM_NR_IRQCHIPS 1
+#define KVM_IRQCHIP_NUM_PINS 4096
+
 struct sca_entry {
 	atomic_t scn;
 	__u32	reserved;
@@ -274,6 +282,7 @@ struct kvm_arch{
 	struct kvm_device *flic;
 	struct gmap *gmap;
 	int css_support;
+	int use_irqchip;
 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
 };
 
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index c8bacbcd2e5b..10d529ac9821 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -25,6 +25,8 @@ config KVM
 	select HAVE_KVM_EVENTFD
 	select KVM_ASYNC_PF
 	select KVM_ASYNC_PF_SYNC
+	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQ_ROUTING
 	---help---
 	  Support hosting paravirtualized guest machines using the SIE
 	  virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index a47d2c355f68..d3adb37e93a4 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -7,7 +7,7 @@
 # as published by the Free Software Foundation.
 
 KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 7ecef5a18e25..2e2814eceb85 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -13,6 +13,7 @@
 #include <linux/interrupt.h>
 #include <linux/kvm_host.h>
 #include <linux/hrtimer.h>
+#include <linux/mmu_context.h>
 #include <linux/signal.h>
 #include <linux/slab.h>
 #include <asm/asm-offsets.h>
@@ -1284,3 +1285,123 @@ struct kvm_device_ops kvm_flic_ops = {
 	.create = flic_create,
 	.destroy = flic_destroy,
 };
+
+static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
+{
+	unsigned long bit;
+
+	bit = bit_nr + (addr % PAGE_SIZE) * 8;
+
+	return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
+}
+
+static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter,
+					  u64 addr)
+{
+	struct s390_map_info *map;
+
+	if (!adapter)
+		return NULL;
+
+	list_for_each_entry(map, &adapter->maps, list) {
+		if (map->guest_addr == addr)
+			return map;
+	}
+	return NULL;
+}
+
+static int adapter_indicators_set(struct kvm *kvm,
+				  struct s390_io_adapter *adapter,
+				  struct kvm_s390_adapter_int *adapter_int)
+{
+	unsigned long bit;
+	int summary_set, idx;
+	struct s390_map_info *info;
+	void *map;
+
+	info = get_map_info(adapter, adapter_int->ind_addr);
+	if (!info)
+		return -1;
+	map = page_address(info->page);
+	bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap);
+	set_bit(bit, map);
+	idx = srcu_read_lock(&kvm->srcu);
+	mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
+	set_page_dirty_lock(info->page);
+	info = get_map_info(adapter, adapter_int->summary_addr);
+	if (!info) {
+		srcu_read_unlock(&kvm->srcu, idx);
+		return -1;
+	}
+	map = page_address(info->page);
+	bit = get_ind_bit(info->addr, adapter_int->summary_offset,
+			  adapter->swap);
+	summary_set = test_and_set_bit(bit, map);
+	mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
+	set_page_dirty_lock(info->page);
+	srcu_read_unlock(&kvm->srcu, idx);
+	return summary_set ? 0 : 1;
+}
+
+/*
+ * < 0 - not injected due to error
+ * = 0 - coalesced, summary indicator already active
+ * > 0 - injected interrupt
+ */
+static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
+			   struct kvm *kvm, int irq_source_id, int level,
+			   bool line_status)
+{
+	int ret;
+	struct s390_io_adapter *adapter;
+
+	/* We're only interested in the 0->1 transition. */
+	if (!level)
+		return 0;
+	adapter = get_io_adapter(kvm, e->adapter.adapter_id);
+	if (!adapter)
+		return -1;
+	down_read(&adapter->maps_lock);
+	ret = adapter_indicators_set(kvm, adapter, &e->adapter);
+	up_read(&adapter->maps_lock);
+	if ((ret > 0) && !adapter->masked) {
+		struct kvm_s390_interrupt s390int = {
+			.type = KVM_S390_INT_IO(1, 0, 0, 0),
+			.parm = 0,
+			.parm64 = (adapter->isc << 27) | 0x80000000,
+		};
+		ret = kvm_s390_inject_vm(kvm, &s390int);
+		if (ret == 0)
+			ret = 1;
+	}
+	return ret;
+}
+
+int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
+			  struct kvm_kernel_irq_routing_entry *e,
+			  const struct kvm_irq_routing_entry *ue)
+{
+	int ret;
+
+	switch (ue->type) {
+	case KVM_IRQ_ROUTING_S390_ADAPTER:
+		e->set = set_adapter_int;
+		e->adapter.summary_addr = ue->u.adapter.summary_addr;
+		e->adapter.ind_addr = ue->u.adapter.ind_addr;
+		e->adapter.summary_offset = ue->u.adapter.summary_offset;
+		e->adapter.ind_offset = ue->u.adapter.ind_offset;
+		e->adapter.adapter_id = ue->u.adapter.adapter_id;
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
+		int irq_source_id, int level, bool line_status)
+{
+	return -EINVAL;
+}
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
new file mode 100644
index 000000000000..d98e4159643d
--- /dev/null
+++ b/arch/s390/kvm/irq.h
@@ -0,0 +1,22 @@
+/*
+ * s390 irqchip routines
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+#ifndef __KVM_IRQ_H
+#define __KVM_IRQ_H
+
+#include <linux/kvm_host.h>
+
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+	return 1;
+}
+
+#endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 2e6fbb0b4f68..ce5b659ec531 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -196,6 +196,10 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 		return -EINVAL;
 
 	switch (cap->cap) {
+	case KVM_CAP_S390_IRQCHIP:
+		kvm->arch.use_irqchip = 1;
+		r = 0;
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -228,6 +232,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
 		break;
 	}
+	case KVM_CREATE_IRQCHIP: {
+		struct kvm_irq_routing_entry routing;
+
+		r = -EINVAL;
+		if (kvm->arch.use_irqchip) {
+			/* Set up dummy routing. */
+			memset(&routing, 0, sizeof(routing));
+			kvm_set_irq_routing(kvm, &routing, 0, 0);
+			r = 0;
+		}
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
@@ -284,6 +300,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	}
 
 	kvm->arch.css_support = 0;
+	kvm->arch.use_irqchip = 0;
 
 	return 0;
 out_nogmap:
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9816b68b085f..da7510b4c6ad 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -297,6 +297,14 @@ static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memsl
 	return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
 }
 
+struct kvm_s390_adapter_int {
+	u64 ind_addr;
+	u64 summary_addr;
+	u64 ind_offset;
+	u32 summary_offset;
+	u32 adapter_id;
+};
+
 struct kvm_kernel_irq_routing_entry {
 	u32 gsi;
 	u32 type;
@@ -309,6 +317,7 @@ struct kvm_kernel_irq_routing_entry {
 			unsigned pin;
 		} irqchip;
 		struct msi_msg msi;
+		struct kvm_s390_adapter_int adapter;
 	};
 	struct hlist_node link;
 };
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 46ea1b470c76..a8f4ee5d2e82 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -742,6 +742,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_HYPERV_TIME 96
 #define KVM_CAP_IOAPIC_POLARITY_IGNORED 97
 #define KVM_CAP_ENABLE_CAP_VM 98
+#define KVM_CAP_S390_IRQCHIP 99
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -757,9 +758,18 @@ struct kvm_irq_routing_msi {
 	__u32 pad;
 };
 
+struct kvm_irq_routing_s390_adapter {
+	__u64 ind_addr;
+	__u64 summary_addr;
+	__u64 ind_offset;
+	__u32 summary_offset;
+	__u32 adapter_id;
+};
+
 /* gsi routing entry types */
 #define KVM_IRQ_ROUTING_IRQCHIP 1
 #define KVM_IRQ_ROUTING_MSI 2
+#define KVM_IRQ_ROUTING_S390_ADAPTER 3
 
 struct kvm_irq_routing_entry {
 	__u32 gsi;
@@ -769,6 +779,7 @@ struct kvm_irq_routing_entry {
 	union {
 		struct kvm_irq_routing_irqchip irqchip;
 		struct kvm_irq_routing_msi msi;
+		struct kvm_irq_routing_s390_adapter adapter;
 		__u32 pad[8];
 	} u;
 };
-- 
cgit v1.2.3


From 6092315dfdec5185881605d15a0e200d6e90eb66 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Thu, 20 Mar 2014 22:21:52 +0100
Subject: ptp: introduce programmable pins.

This patch adds a pair of new ioctls to the PTP Hardware Clock device
interface. Using the ioctls, user space programs can query each pin to
find out its current function and also reprogram a different function
if desired.

Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_chardev.c        | 128 ++++++++++++++++++++++++++++++++++++++-
 drivers/ptp/ptp_clock.c          |  23 +++++++
 drivers/ptp/ptp_private.h        |   5 ++
 include/linux/ptp_clock_kernel.h |  33 ++++++++++
 include/uapi/linux/ptp_clock.h   |  39 +++++++++++-
 5 files changed, 226 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 34a0c607318e..419056d7887e 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -25,6 +25,96 @@
 
 #include "ptp_private.h"
 
+static int ptp_disable_pinfunc(struct ptp_clock_info *ops,
+			       enum ptp_pin_function func, unsigned int chan)
+{
+	struct ptp_clock_request rq;
+	int err = 0;
+
+	memset(&rq, 0, sizeof(rq));
+
+	switch (func) {
+	case PTP_PF_NONE:
+		break;
+	case PTP_PF_EXTTS:
+		rq.type = PTP_CLK_REQ_EXTTS;
+		rq.extts.index = chan;
+		err = ops->enable(ops, &rq, 0);
+		break;
+	case PTP_PF_PEROUT:
+		rq.type = PTP_CLK_REQ_PEROUT;
+		rq.perout.index = chan;
+		err = ops->enable(ops, &rq, 0);
+		break;
+	case PTP_PF_PHYSYNC:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return err;
+}
+
+int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin,
+		    enum ptp_pin_function func, unsigned int chan)
+{
+	struct ptp_clock_info *info = ptp->info;
+	struct ptp_pin_desc *pin1 = NULL, *pin2 = &info->pin_config[pin];
+	unsigned int i;
+
+	/* Check to see if any other pin previously had this function. */
+	for (i = 0; i < info->n_pins; i++) {
+		if (info->pin_config[i].func == func &&
+		    info->pin_config[i].chan == chan) {
+			pin1 = &info->pin_config[i];
+			break;
+		}
+	}
+	if (pin1 && i == pin)
+		return 0;
+
+	/* Check the desired function and channel. */
+	switch (func) {
+	case PTP_PF_NONE:
+		break;
+	case PTP_PF_EXTTS:
+		if (chan >= info->n_ext_ts)
+			return -EINVAL;
+		break;
+	case PTP_PF_PEROUT:
+		if (chan >= info->n_per_out)
+			return -EINVAL;
+		break;
+	case PTP_PF_PHYSYNC:
+		pr_err("sorry, cannot reassign the calibration pin\n");
+		return -EINVAL;
+	default:
+		return -EINVAL;
+	}
+
+	if (pin2->func == PTP_PF_PHYSYNC) {
+		pr_err("sorry, cannot reprogram the calibration pin\n");
+		return -EINVAL;
+	}
+
+	if (info->verify(info, pin, func, chan)) {
+		pr_err("driver cannot use function %u on pin %u\n", func, chan);
+		return -EOPNOTSUPP;
+	}
+
+	/* Disable whatever function was previously assigned. */
+	if (pin1) {
+		ptp_disable_pinfunc(info, func, chan);
+		pin1->func = PTP_PF_NONE;
+		pin1->chan = 0;
+	}
+	ptp_disable_pinfunc(info, pin2->func, pin2->chan);
+	pin2->func = func;
+	pin2->chan = chan;
+
+	return 0;
+}
+
 int ptp_open(struct posix_clock *pc, fmode_t fmode)
 {
 	return 0;
@@ -35,12 +125,13 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 	struct ptp_clock_caps caps;
 	struct ptp_clock_request req;
 	struct ptp_sys_offset *sysoff = NULL;
+	struct ptp_pin_desc pd;
 	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
 	struct ptp_clock_info *ops = ptp->info;
 	struct ptp_clock_time *pct;
 	struct timespec ts;
 	int enable, err = 0;
-	unsigned int i;
+	unsigned int i, pin_index;
 
 	switch (cmd) {
 
@@ -51,6 +142,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 		caps.n_ext_ts = ptp->info->n_ext_ts;
 		caps.n_per_out = ptp->info->n_per_out;
 		caps.pps = ptp->info->pps;
+		caps.n_pins = ptp->info->n_pins;
 		if (copy_to_user((void __user *)arg, &caps, sizeof(caps)))
 			err = -EFAULT;
 		break;
@@ -126,6 +218,40 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 			err = -EFAULT;
 		break;
 
+	case PTP_PIN_GETFUNC:
+		if (copy_from_user(&pd, (void __user *)arg, sizeof(pd))) {
+			err = -EFAULT;
+			break;
+		}
+		pin_index = pd.index;
+		if (pin_index >= ops->n_pins) {
+			err = -EINVAL;
+			break;
+		}
+		if (mutex_lock_interruptible(&ptp->pincfg_mux))
+			return -ERESTARTSYS;
+		pd = ops->pin_config[pin_index];
+		mutex_unlock(&ptp->pincfg_mux);
+		if (!err && copy_to_user((void __user *)arg, &pd, sizeof(pd)))
+			err = -EFAULT;
+		break;
+
+	case PTP_PIN_SETFUNC:
+		if (copy_from_user(&pd, (void __user *)arg, sizeof(pd))) {
+			err = -EFAULT;
+			break;
+		}
+		pin_index = pd.index;
+		if (pin_index >= ops->n_pins) {
+			err = -EINVAL;
+			break;
+		}
+		if (mutex_lock_interruptible(&ptp->pincfg_mux))
+			return -ERESTARTSYS;
+		err = ptp_set_pinfunc(ptp, pin_index, pd.func, pd.chan);
+		mutex_unlock(&ptp->pincfg_mux);
+		break;
+
 	default:
 		err = -ENOTTY;
 		break;
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index a8319b266643..e25d2bc898e5 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -169,6 +169,7 @@ static void delete_ptp_clock(struct posix_clock *pc)
 	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
 
 	mutex_destroy(&ptp->tsevq_mux);
+	mutex_destroy(&ptp->pincfg_mux);
 	ida_simple_remove(&ptp_clocks_map, ptp->index);
 	kfree(ptp);
 }
@@ -203,6 +204,7 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
 	ptp->index = index;
 	spin_lock_init(&ptp->tsevq.lock);
 	mutex_init(&ptp->tsevq_mux);
+	mutex_init(&ptp->pincfg_mux);
 	init_waitqueue_head(&ptp->tsev_wq);
 
 	/* Create a new device in our class. */
@@ -249,6 +251,7 @@ no_sysfs:
 	device_destroy(ptp_class, ptp->devid);
 no_device:
 	mutex_destroy(&ptp->tsevq_mux);
+	mutex_destroy(&ptp->pincfg_mux);
 no_slot:
 	kfree(ptp);
 no_memory:
@@ -305,6 +308,26 @@ int ptp_clock_index(struct ptp_clock *ptp)
 }
 EXPORT_SYMBOL(ptp_clock_index);
 
+int ptp_find_pin(struct ptp_clock *ptp,
+		 enum ptp_pin_function func, unsigned int chan)
+{
+	struct ptp_pin_desc *pin = NULL;
+	int i;
+
+	mutex_lock(&ptp->pincfg_mux);
+	for (i = 0; i < ptp->info->n_pins; i++) {
+		if (ptp->info->pin_config[i].func == func &&
+		    ptp->info->pin_config[i].chan == chan) {
+			pin = &ptp->info->pin_config[i];
+			break;
+		}
+	}
+	mutex_unlock(&ptp->pincfg_mux);
+
+	return pin ? i : -1;
+}
+EXPORT_SYMBOL(ptp_find_pin);
+
 /* module operations */
 
 static void __exit ptp_exit(void)
diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h
index df03f2e30ad9..b114a84c63c7 100644
--- a/drivers/ptp/ptp_private.h
+++ b/drivers/ptp/ptp_private.h
@@ -48,6 +48,7 @@ struct ptp_clock {
 	long dialed_frequency; /* remembers the frequency adjustment */
 	struct timestamp_event_queue tsevq; /* simple fifo for time stamps */
 	struct mutex tsevq_mux; /* one process at a time reading the fifo */
+	struct mutex pincfg_mux; /* protect concurrent info->pin_config access */
 	wait_queue_head_t tsev_wq;
 	int defunct; /* tells readers to go away when clock is being removed */
 };
@@ -69,6 +70,10 @@ static inline int queue_cnt(struct timestamp_event_queue *q)
  * see ptp_chardev.c
  */
 
+/* caller must hold pincfg_mux */
+int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin,
+		    enum ptp_pin_function func, unsigned int chan);
+
 long ptp_ioctl(struct posix_clock *pc,
 	       unsigned int cmd, unsigned long arg);
 
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 38a993508327..0d8ff3fb84ba 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -49,7 +49,11 @@ struct ptp_clock_request {
  * @n_alarm:   The number of programmable alarms.
  * @n_ext_ts:  The number of external time stamp channels.
  * @n_per_out: The number of programmable periodic signals.
+ * @n_pins:    The number of programmable pins.
  * @pps:       Indicates whether the clock supports a PPS callback.
+ * @pin_config: Array of length 'n_pins'. If the number of
+ *              programmable pins is nonzero, then drivers must
+ *              allocate and initialize this array.
  *
  * clock operations
  *
@@ -70,6 +74,18 @@ struct ptp_clock_request {
  *            parameter request: Desired resource to enable or disable.
  *            parameter on: Caller passes one to enable or zero to disable.
  *
+ * @verify:   Confirm that a pin can perform a given function. The PTP
+ *            Hardware Clock subsystem maintains the 'pin_config'
+ *            array on behalf of the drivers, but the PHC subsystem
+ *            assumes that every pin can perform every function. This
+ *            hook gives drivers a way of telling the core about
+ *            limitations on specific pins. This function must return
+ *            zero if the function can be assigned to this pin, and
+ *            nonzero otherwise.
+ *            parameter pin: index of the pin in question.
+ *            parameter func: the desired function to use.
+ *            parameter chan: the function channel index to use.
+ *
  * Drivers should embed their ptp_clock_info within a private
  * structure, obtaining a reference to it using container_of().
  *
@@ -83,13 +99,17 @@ struct ptp_clock_info {
 	int n_alarm;
 	int n_ext_ts;
 	int n_per_out;
+	int n_pins;
 	int pps;
+	struct ptp_pin_desc *pin_config;
 	int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta);
 	int (*adjtime)(struct ptp_clock_info *ptp, s64 delta);
 	int (*gettime)(struct ptp_clock_info *ptp, struct timespec *ts);
 	int (*settime)(struct ptp_clock_info *ptp, const struct timespec *ts);
 	int (*enable)(struct ptp_clock_info *ptp,
 		      struct ptp_clock_request *request, int on);
+	int (*verify)(struct ptp_clock_info *ptp, unsigned int pin,
+		      enum ptp_pin_function func, unsigned int chan);
 };
 
 struct ptp_clock;
@@ -156,4 +176,17 @@ extern void ptp_clock_event(struct ptp_clock *ptp,
 
 extern int ptp_clock_index(struct ptp_clock *ptp);
 
+/**
+ * ptp_find_pin() - obtain the pin index of a given auxiliary function
+ *
+ * @ptp:    The clock obtained from ptp_clock_register().
+ * @func:   One of the ptp_pin_function enumerated values.
+ * @chan:   The particular functional channel to find.
+ * Return:  Pin index in the range of zero to ptp_clock_caps.n_pins - 1,
+ *          or -1 if the auxiliary function cannot be found.
+ */
+
+int ptp_find_pin(struct ptp_clock *ptp,
+		 enum ptp_pin_function func, unsigned int chan);
+
 #endif
diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h
index b65c834f83e9..f0b7bfe5da92 100644
--- a/include/uapi/linux/ptp_clock.h
+++ b/include/uapi/linux/ptp_clock.h
@@ -50,7 +50,8 @@ struct ptp_clock_caps {
 	int n_ext_ts;  /* Number of external time stamp channels. */
 	int n_per_out; /* Number of programmable periodic signals. */
 	int pps;       /* Whether the clock supports a PPS callback. */
-	int rsv[15];   /* Reserved for future use. */
+	int n_pins;    /* Number of input/output pins. */
+	int rsv[14];   /* Reserved for future use. */
 };
 
 struct ptp_extts_request {
@@ -80,6 +81,40 @@ struct ptp_sys_offset {
 	struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1];
 };
 
+enum ptp_pin_function {
+	PTP_PF_NONE,
+	PTP_PF_EXTTS,
+	PTP_PF_PEROUT,
+	PTP_PF_PHYSYNC,
+};
+
+struct ptp_pin_desc {
+	/*
+	 * Hardware specific human readable pin name. This field is
+	 * set by the kernel during the PTP_PIN_GETFUNC ioctl and is
+	 * ignored for the PTP_PIN_SETFUNC ioctl.
+	 */
+	char name[64];
+	/*
+	 * Pin index in the range of zero to ptp_clock_caps.n_pins - 1.
+	 */
+	unsigned int index;
+	/*
+	 * Which of the PTP_PF_xxx functions to use on this pin.
+	 */
+	unsigned int func;
+	/*
+	 * The specific channel to use for this function.
+	 * This corresponds to the 'index' field of the
+	 * PTP_EXTTS_REQUEST and PTP_PEROUT_REQUEST ioctls.
+	 */
+	unsigned int chan;
+	/*
+	 * Reserved for future use.
+	 */
+	unsigned int rsv[5];
+};
+
 #define PTP_CLK_MAGIC '='
 
 #define PTP_CLOCK_GETCAPS  _IOR(PTP_CLK_MAGIC, 1, struct ptp_clock_caps)
@@ -87,6 +122,8 @@ struct ptp_sys_offset {
 #define PTP_PEROUT_REQUEST _IOW(PTP_CLK_MAGIC, 3, struct ptp_perout_request)
 #define PTP_ENABLE_PPS     _IOW(PTP_CLK_MAGIC, 4, int)
 #define PTP_SYS_OFFSET     _IOW(PTP_CLK_MAGIC, 5, struct ptp_sys_offset)
+#define PTP_PIN_GETFUNC    _IOWR(PTP_CLK_MAGIC, 6, struct ptp_pin_desc)
+#define PTP_PIN_SETFUNC    _IOW(PTP_CLK_MAGIC, 7, struct ptp_pin_desc)
 
 struct ptp_extts_event {
 	struct ptp_clock_time t; /* Time event occured. */
-- 
cgit v1.2.3


From 356750e35e86485c464704c0a32c1d8dc77590d7 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 24 Mar 2014 12:13:48 -0400
Subject: audit: define audit_is_compat in kernel internal header

We were exposing a function based on kernel config options to userspace.
This is wrong.  Move it to the audit internal header.

Suggested-by: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h      | 6 ++++++
 include/uapi/linux/audit.h | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 4b2983e25ce0..611a59a56f1a 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -102,6 +102,12 @@ struct filename;
 
 extern void audit_log_session_info(struct audit_buffer *ab);
 
+#ifdef CONFIG_COMPAT
+#define audit_is_compat(arch)  (!((arch) & __AUDIT_ARCH_64BIT))
+#else
+#define audit_is_compat(arch)  false
+#endif
+
 #ifdef CONFIG_AUDITSYSCALL
 /* These are defined in auditsc.c */
 				/* Public API */
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 9af01d77dc44..4315ee99b967 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -362,12 +362,6 @@ enum {
 #define AUDIT_ARCH_SPARC64	(EM_SPARCV9|__AUDIT_ARCH_64BIT)
 #define AUDIT_ARCH_X86_64	(EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 
-#ifdef CONFIG_COMPAT
-#define audit_is_compat(arch)	(!((arch) & __AUDIT_ARCH_64BIT))
-#else
-#define audit_is_compat(arch)	false
-#endif
-
 #define AUDIT_PERM_EXEC		1
 #define AUDIT_PERM_WRITE	2
 #define AUDIT_PERM_READ		4
-- 
cgit v1.2.3


From aee786acfc0a12bcd37a1c60f3198fb25cf7181a Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Thu, 27 Feb 2014 06:04:57 -0300
Subject: [media] videodev2.h: add parenthesis around macro arguments

bt->width should be (bt)->width, and same for the other fields.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Cc: stable@vger.kernel.org # For 3.12 or upper
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/videodev2.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index e35ad6ca1e8a..ea468ee8fe21 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1080,14 +1080,14 @@ struct v4l2_bt_timings {
 
 /* A few useful defines to calculate the total blanking and frame sizes */
 #define V4L2_DV_BT_BLANKING_WIDTH(bt) \
-	(bt->hfrontporch + bt->hsync + bt->hbackporch)
+	((bt)->hfrontporch + (bt)->hsync + (bt)->hbackporch)
 #define V4L2_DV_BT_FRAME_WIDTH(bt) \
-	(bt->width + V4L2_DV_BT_BLANKING_WIDTH(bt))
+	((bt)->width + V4L2_DV_BT_BLANKING_WIDTH(bt))
 #define V4L2_DV_BT_BLANKING_HEIGHT(bt) \
-	(bt->vfrontporch + bt->vsync + bt->vbackporch + \
-	 bt->il_vfrontporch + bt->il_vsync + bt->il_vbackporch)
+	((bt)->vfrontporch + (bt)->vsync + (bt)->vbackporch + \
+	 (bt)->il_vfrontporch + (bt)->il_vsync + (bt)->il_vbackporch)
 #define V4L2_DV_BT_FRAME_HEIGHT(bt) \
-	(bt->height + V4L2_DV_BT_BLANKING_HEIGHT(bt))
+	((bt)->height + V4L2_DV_BT_BLANKING_HEIGHT(bt))
 
 /** struct v4l2_dv_timings - DV timings
  * @type:	the type of the timings
-- 
cgit v1.2.3


From 339e022396d58f4b4f9b4200ea5309768934bb33 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 28 Mar 2014 14:25:58 -0700
Subject: net: export NET_ADDR_* values to user-space API

NET_ADDR_* values are exported in the
/sys/class/net/<iface>/addr_assign_type sysfs attributes, and as such
constitutes an user-space ABI. Move the NET_ADDR_* definitions from
include/linux/netdevice.h to include/uapi/linux/netdevice.h

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h      | 7 -------
 include/uapi/linux/netdevice.h | 6 ++++++
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 29b579fb5196..34cae3ee74f1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -63,13 +63,6 @@ struct wireless_dev;
 void netdev_set_default_ethtool_ops(struct net_device *dev,
 				    const struct ethtool_ops *ops);
 
-/* hardware address assignment types */
-#define NET_ADDR_PERM		0	/* address is permanent (default) */
-#define NET_ADDR_RANDOM		1	/* address is generated randomly */
-#define NET_ADDR_STOLEN		2	/* address is stolen from other device */
-#define NET_ADDR_SET		3	/* address is set using
-					 * dev_set_mac_address() */
-
 /* Backlog congestion levels */
 #define NET_RX_SUCCESS		0	/* keep 'em coming, baby */
 #define NET_RX_DROP		1	/* packet dropped */
diff --git a/include/uapi/linux/netdevice.h b/include/uapi/linux/netdevice.h
index 6b9500bc2d56..fdfbd1c17065 100644
--- a/include/uapi/linux/netdevice.h
+++ b/include/uapi/linux/netdevice.h
@@ -49,5 +49,11 @@ enum {
         IF_PORT_100BASEFX
 };
 
+/* hardware address assignment types */
+#define NET_ADDR_PERM		0	/* address is permanent (default) */
+#define NET_ADDR_RANDOM		1	/* address is generated randomly */
+#define NET_ADDR_STOLEN		2	/* address is stolen from other device */
+#define NET_ADDR_SET		3	/* address is set using
+					 * dev_set_mac_address() */
 
 #endif /* _UAPI_LINUX_NETDEVICE_H */
-- 
cgit v1.2.3


From 2d3b479df41a10e2f41f9259fcba775bd34de6e4 Mon Sep 17 00:00:00 2001
From: david decotigny <decot@googlers.com>
Date: Sat, 29 Mar 2014 09:48:35 -0700
Subject: net-sysfs: expose number of carrier on/off changes

This allows to monitor carrier on/off transitions and detect link
flapping issues:
 - new /sys/class/net/X/carrier_changes
 - new rtnetlink IFLA_CARRIER_CHANGES (getlink)

Tested:
  - grep . /sys/class/net/*/carrier_changes
    + ip link set dev X down/up
    + plug/unplug cable
  - updated iproute2: prints IFLA_CARRIER_CHANGES
  - iproute2 20121211-2 (debian): unchanged behavior

Signed-off-by: David Decotigny <decot@googlers.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h    |  3 +++
 include/uapi/linux/if_link.h |  1 +
 net/core/net-sysfs.c         | 11 +++++++++++
 net/core/rtnetlink.c         |  6 +++++-
 net/sched/sch_generic.c      |  2 ++
 5 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 34cae3ee74f1..45537ed7a5b3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1308,6 +1308,9 @@ struct net_device {
 	atomic_long_t		rx_dropped;
 	atomic_long_t		tx_dropped;
 
+	/* Stats to monitor carrier on<->off transitions */
+	atomic_t		carrier_changes;
+
 #ifdef CONFIG_WIRELESS_EXT
 	/* List of functions to handle Wireless Extensions (instead of ioctl).
 	 * See <net/iw_handler.h> for details. Jean II */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 16410b6e7819..9a7f7ace6649 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -144,6 +144,7 @@ enum {
 	IFLA_NUM_RX_QUEUES,
 	IFLA_CARRIER,
 	IFLA_PHYS_PORT_ID,
+	IFLA_CARRIER_CHANGES,
 	__IFLA_MAX
 };
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index daed9a64c6f6..462396278484 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -253,6 +253,16 @@ static ssize_t operstate_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(operstate);
 
+static ssize_t carrier_changes_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	return sprintf(buf, fmt_dec,
+		       atomic_read(&netdev->carrier_changes));
+}
+static DEVICE_ATTR_RO(carrier_changes);
+
 /* read-write attributes */
 
 static int change_mtu(struct net_device *net, unsigned long new_mtu)
@@ -386,6 +396,7 @@ static struct attribute *net_class_attrs[] = {
 	&dev_attr_duplex.attr,
 	&dev_attr_dormant.attr,
 	&dev_attr_operstate.attr,
+	&dev_attr_carrier_changes.attr,
 	&dev_attr_ifalias.attr,
 	&dev_attr_carrier.attr,
 	&dev_attr_mtu.attr,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e7c6006bc3ea..d4ff41739b0f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -822,6 +822,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */
 	       + nla_total_size(1) /* IFLA_OPERSTATE */
 	       + nla_total_size(1) /* IFLA_LINKMODE */
+	       + nla_total_size(4) /* IFLA_CARRIER_CHANGES */
 	       + nla_total_size(ext_filter_mask
 			        & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
 	       + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
@@ -970,7 +971,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	    (dev->qdisc &&
 	     nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
 	    (dev->ifalias &&
-	     nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)))
+	     nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)) ||
+	    nla_put_u32(skb, IFLA_CARRIER_CHANGES,
+			atomic_read(&dev->carrier_changes)))
 		goto nla_put_failure;
 
 	if (1) {
@@ -1147,6 +1150,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_NUM_TX_QUEUES]	= { .type = NLA_U32 },
 	[IFLA_NUM_RX_QUEUES]	= { .type = NLA_U32 },
 	[IFLA_PHYS_PORT_ID]	= { .type = NLA_BINARY, .len = MAX_PHYS_PORT_ID_LEN },
+	[IFLA_CARRIER_CHANGES]	= { .type = NLA_U32 },  /* ignored */
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e82e43b69c33..e1543b03e39d 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -310,6 +310,7 @@ void netif_carrier_on(struct net_device *dev)
 	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
 		if (dev->reg_state == NETREG_UNINITIALIZED)
 			return;
+		atomic_inc(&dev->carrier_changes);
 		linkwatch_fire_event(dev);
 		if (netif_running(dev))
 			__netdev_watchdog_up(dev);
@@ -328,6 +329,7 @@ void netif_carrier_off(struct net_device *dev)
 	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
 		if (dev->reg_state == NETREG_UNINITIALIZED)
 			return;
+		atomic_inc(&dev->carrier_changes);
 		linkwatch_fire_event(dev);
 	}
 }
-- 
cgit v1.2.3


From 0a7c3937a1f23f8cb5fc77ae01661e9968a51d0c Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Apr 2014 17:08:43 +0200
Subject: vfs: add RENAME_NOREPLACE flag

If this flag is specified and the target of the rename exists then the
rename syscall fails with EEXIST.

The VFS does the existence checking, so it is trivial to enable for most
local filesystems.  This patch only enables it in ext4.

For network filesystems the VFS check is not enough as there may be a race
between a remote create and the rename, so these filesystems need to handle
this flag in their ->rename() implementations to ensure atomicity.

Andy writes about why this is useful:

"The trivial answer: to eliminate the race condition from 'mv -i'.

Another answer: there's a common pattern to atomically create a file
with contents: open a temporary file, write to it, optionally fsync
it, close it, then link(2) it to the final name, then unlink the
temporary file.

The reason to use link(2) is because it won't silently clobber the destination.

This is annoying:
 - It requires an extra system call that shouldn't be necessary.
 - It doesn't work on (IMO sensible) filesystems that don't support
hard links (e.g. vfat).
 - It's not atomic -- there's an intermediate state where both files exist.
 - It's ugly.

The new rename flag will make this totally sensible.

To be fair, on new enough kernels, you can also use O_TMPFILE and
linkat to achieve the same thing even more cleanly."

Suggested-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Reviewed-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/ext4/namei.c         | 11 +++++++++++
 fs/namei.c              | 21 +++++++++++++--------
 include/uapi/linux/fs.h |  2 ++
 3 files changed, 26 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index d050e043e884..5f19171b3e1f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3204,6 +3204,16 @@ end_rename:
 	return retval;
 }
 
+static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry,
+			struct inode *new_dir, struct dentry *new_dentry,
+			unsigned int flags)
+{
+	if (flags & ~RENAME_NOREPLACE)
+		return -EINVAL;
+
+	return ext4_rename(old_dir, old_dentry, new_dir, new_dentry);
+}
+
 /*
  * directories can handle most operations...
  */
@@ -3218,6 +3228,7 @@ const struct inode_operations ext4_dir_inode_operations = {
 	.mknod		= ext4_mknod,
 	.tmpfile	= ext4_tmpfile,
 	.rename		= ext4_rename,
+	.rename2	= ext4_rename2,
 	.setattr	= ext4_setattr,
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
diff --git a/fs/namei.c b/fs/namei.c
index ab4e48c4a80a..0e9d186b7f77 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4142,7 +4142,7 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
 	bool should_retry = false;
 	int error;
 
-	if (flags)
+	if (flags & ~RENAME_NOREPLACE)
 		return -EINVAL;
 
 retry:
@@ -4168,6 +4168,8 @@ retry:
 		goto exit2;
 
 	new_dir = newnd.path.dentry;
+	if (flags & RENAME_NOREPLACE)
+		error = -EEXIST;
 	if (newnd.last_type != LAST_NORM)
 		goto exit2;
 
@@ -4190,22 +4192,25 @@ retry_deleg:
 	error = -ENOENT;
 	if (d_is_negative(old_dentry))
 		goto exit4;
+	new_dentry = lookup_hash(&newnd);
+	error = PTR_ERR(new_dentry);
+	if (IS_ERR(new_dentry))
+		goto exit4;
+	error = -EEXIST;
+	if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry))
+		goto exit5;
 	/* unless the source is a directory trailing slashes give -ENOTDIR */
 	if (!d_is_dir(old_dentry)) {
 		error = -ENOTDIR;
 		if (oldnd.last.name[oldnd.last.len])
-			goto exit4;
+			goto exit5;
 		if (newnd.last.name[newnd.last.len])
-			goto exit4;
+			goto exit5;
 	}
 	/* source should not be ancestor of target */
 	error = -EINVAL;
 	if (old_dentry == trap)
-		goto exit4;
-	new_dentry = lookup_hash(&newnd);
-	error = PTR_ERR(new_dentry);
-	if (IS_ERR(new_dentry))
-		goto exit4;
+		goto exit5;
 	/* target should not be an ancestor of source */
 	error = -ENOTEMPTY;
 	if (new_dentry == trap)
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 6c28b61bb690..9250f4dd7d96 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -35,6 +35,8 @@
 #define SEEK_HOLE	4	/* seek to the next hole */
 #define SEEK_MAX	SEEK_HOLE
 
+#define RENAME_NOREPLACE	(1 << 0)	/* Don't overwrite target */
+
 struct fstrim_range {
 	__u64 start;
 	__u64 len;
-- 
cgit v1.2.3


From da1ce0670c14d8380e423a3239e562a1dc15fa9e Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 1 Apr 2014 17:08:43 +0200
Subject: vfs: add cross-rename

If flags contain RENAME_EXCHANGE then exchange source and destination files.
There's no restriction on the type of the files; e.g. a directory can be
exchanged with a symlink.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/dcache.c             |  50 ++++++++++++++++++-----
 fs/namei.c              | 104 +++++++++++++++++++++++++++++++++---------------
 include/linux/dcache.h  |   1 +
 include/uapi/linux/fs.h |   1 +
 security/security.c     |  16 ++++++++
 5 files changed, 131 insertions(+), 41 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index ca02c13a84aa..66cba5a8a346 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2483,12 +2483,14 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
 			dentry->d_name.name = dentry->d_iname;
 		} else {
 			/*
-			 * Both are internal.  Just copy target to dentry
+			 * Both are internal.
 			 */
-			memcpy(dentry->d_iname, target->d_name.name,
-					target->d_name.len + 1);
-			dentry->d_name.len = target->d_name.len;
-			return;
+			unsigned int i;
+			BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
+			for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
+				swap(((long *) &dentry->d_iname)[i],
+				     ((long *) &target->d_iname)[i]);
+			}
 		}
 	}
 	swap(dentry->d_name.len, target->d_name.len);
@@ -2545,13 +2547,15 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry,
  * __d_move - move a dentry
  * @dentry: entry to move
  * @target: new dentry
+ * @exchange: exchange the two dentries
  *
  * Update the dcache to reflect the move of a file name. Negative
  * dcache entries should not be moved in this way. Caller must hold
  * rename_lock, the i_mutex of the source and target directories,
  * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
  */
-static void __d_move(struct dentry * dentry, struct dentry * target)
+static void __d_move(struct dentry *dentry, struct dentry *target,
+		     bool exchange)
 {
 	if (!dentry->d_inode)
 		printk(KERN_WARNING "VFS: moving negative dcache entry\n");
@@ -2573,8 +2577,15 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
 	__d_drop(dentry);
 	__d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
 
-	/* Unhash the target: dput() will then get rid of it */
+	/*
+	 * Unhash the target (d_delete() is not usable here).  If exchanging
+	 * the two dentries, then rehash onto the other's hash queue.
+	 */
 	__d_drop(target);
+	if (exchange) {
+		__d_rehash(target,
+			   d_hash(dentry->d_parent, dentry->d_name.hash));
+	}
 
 	list_del(&dentry->d_u.d_child);
 	list_del(&target->d_u.d_child);
@@ -2601,6 +2612,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
 	write_seqcount_end(&dentry->d_seq);
 
 	dentry_unlock_parents_for_move(dentry, target);
+	if (exchange)
+		fsnotify_d_move(target);
 	spin_unlock(&target->d_lock);
 	fsnotify_d_move(dentry);
 	spin_unlock(&dentry->d_lock);
@@ -2618,11 +2631,30 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
 void d_move(struct dentry *dentry, struct dentry *target)
 {
 	write_seqlock(&rename_lock);
-	__d_move(dentry, target);
+	__d_move(dentry, target, false);
 	write_sequnlock(&rename_lock);
 }
 EXPORT_SYMBOL(d_move);
 
+/*
+ * d_exchange - exchange two dentries
+ * @dentry1: first dentry
+ * @dentry2: second dentry
+ */
+void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
+{
+	write_seqlock(&rename_lock);
+
+	WARN_ON(!dentry1->d_inode);
+	WARN_ON(!dentry2->d_inode);
+	WARN_ON(IS_ROOT(dentry1));
+	WARN_ON(IS_ROOT(dentry2));
+
+	__d_move(dentry1, dentry2, true);
+
+	write_sequnlock(&rename_lock);
+}
+
 /**
  * d_ancestor - search for an ancestor
  * @p1: ancestor dentry
@@ -2670,7 +2702,7 @@ static struct dentry *__d_unalias(struct inode *inode,
 	m2 = &alias->d_parent->d_inode->i_mutex;
 out_unalias:
 	if (likely(!d_mountpoint(alias))) {
-		__d_move(alias, dentry);
+		__d_move(alias, dentry, false);
 		ret = alias;
 	}
 out_err:
diff --git a/fs/namei.c b/fs/namei.c
index 4096d589bb3f..c1178880f23c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4031,6 +4031,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	const unsigned char *old_name;
 	struct inode *source = old_dentry->d_inode;
 	struct inode *target = new_dentry->d_inode;
+	bool new_is_dir = false;
+	unsigned max_links = new_dir->i_sb->s_max_links;
 
 	if (source == target)
 		return 0;
@@ -4039,10 +4041,16 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (error)
 		return error;
 
-	if (!target)
+	if (!target) {
 		error = may_create(new_dir, new_dentry);
-	else
-		error = may_delete(new_dir, new_dentry, is_dir);
+	} else {
+		new_is_dir = d_is_dir(new_dentry);
+
+		if (!(flags & RENAME_EXCHANGE))
+			error = may_delete(new_dir, new_dentry, is_dir);
+		else
+			error = may_delete(new_dir, new_dentry, new_is_dir);
+	}
 	if (error)
 		return error;
 
@@ -4056,10 +4064,17 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 * If we are going to change the parent - check write permissions,
 	 * we'll need to flip '..'.
 	 */
-	if (is_dir && new_dir != old_dir) {
-		error = inode_permission(source, MAY_WRITE);
-		if (error)
-			return error;
+	if (new_dir != old_dir) {
+		if (is_dir) {
+			error = inode_permission(source, MAY_WRITE);
+			if (error)
+				return error;
+		}
+		if ((flags & RENAME_EXCHANGE) && new_is_dir) {
+			error = inode_permission(target, MAY_WRITE);
+			if (error)
+				return error;
+		}
 	}
 
 	error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry,
@@ -4069,7 +4084,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	old_name = fsnotify_oldname_init(old_dentry->d_name.name);
 	dget(new_dentry);
-	if (!is_dir)
+	if (!is_dir || (flags & RENAME_EXCHANGE))
 		lock_two_nondirectories(source, target);
 	else if (target)
 		mutex_lock(&target->i_mutex);
@@ -4078,25 +4093,25 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
 		goto out;
 
-	if (is_dir) {
-		unsigned max_links = new_dir->i_sb->s_max_links;
-
+	if (max_links && new_dir != old_dir) {
 		error = -EMLINK;
-		if (max_links && !target && new_dir != old_dir &&
-		    new_dir->i_nlink >= max_links)
+		if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links)
 			goto out;
-
-		if (target)
-			shrink_dcache_parent(new_dentry);
-	} else {
+		if ((flags & RENAME_EXCHANGE) && !is_dir && new_is_dir &&
+		    old_dir->i_nlink >= max_links)
+			goto out;
+	}
+	if (is_dir && !(flags & RENAME_EXCHANGE) && target)
+		shrink_dcache_parent(new_dentry);
+	if (!is_dir) {
 		error = try_break_deleg(source, delegated_inode);
 		if (error)
 			goto out;
-		if (target) {
-			error = try_break_deleg(target, delegated_inode);
-			if (error)
-				goto out;
-		}
+	}
+	if (target && !new_is_dir) {
+		error = try_break_deleg(target, delegated_inode);
+		if (error)
+			goto out;
 	}
 	if (!flags) {
 		error = old_dir->i_op->rename(old_dir, old_dentry,
@@ -4108,22 +4123,31 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (error)
 		goto out;
 
-	if (target) {
+	if (!(flags & RENAME_EXCHANGE) && target) {
 		if (is_dir)
 			target->i_flags |= S_DEAD;
 		dont_mount(new_dentry);
 	}
-	if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
-		d_move(old_dentry, new_dentry);
+	if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) {
+		if (!(flags & RENAME_EXCHANGE))
+			d_move(old_dentry, new_dentry);
+		else
+			d_exchange(old_dentry, new_dentry);
+	}
 out:
-	if (!is_dir)
+	if (!is_dir || (flags & RENAME_EXCHANGE))
 		unlock_two_nondirectories(source, target);
 	else if (target)
 		mutex_unlock(&target->i_mutex);
 	dput(new_dentry);
-	if (!error)
+	if (!error) {
 		fsnotify_move(old_dir, new_dir, old_name, is_dir,
-			      target, old_dentry);
+			      !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry);
+		if (flags & RENAME_EXCHANGE) {
+			fsnotify_move(new_dir, old_dir, old_dentry->d_name.name,
+				      new_is_dir, NULL, new_dentry);
+		}
+	}
 	fsnotify_oldname_free(old_name);
 
 	return error;
@@ -4143,7 +4167,10 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
 	bool should_retry = false;
 	int error;
 
-	if (flags & ~RENAME_NOREPLACE)
+	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+		return -EINVAL;
+
+	if ((flags & RENAME_NOREPLACE) && (flags & RENAME_EXCHANGE))
 		return -EINVAL;
 
 retry:
@@ -4180,7 +4207,8 @@ retry:
 
 	oldnd.flags &= ~LOOKUP_PARENT;
 	newnd.flags &= ~LOOKUP_PARENT;
-	newnd.flags |= LOOKUP_RENAME_TARGET;
+	if (!(flags & RENAME_EXCHANGE))
+		newnd.flags |= LOOKUP_RENAME_TARGET;
 
 retry_deleg:
 	trap = lock_rename(new_dir, old_dir);
@@ -4200,12 +4228,23 @@ retry_deleg:
 	error = -EEXIST;
 	if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry))
 		goto exit5;
+	if (flags & RENAME_EXCHANGE) {
+		error = -ENOENT;
+		if (d_is_negative(new_dentry))
+			goto exit5;
+
+		if (!d_is_dir(new_dentry)) {
+			error = -ENOTDIR;
+			if (newnd.last.name[newnd.last.len])
+				goto exit5;
+		}
+	}
 	/* unless the source is a directory trailing slashes give -ENOTDIR */
 	if (!d_is_dir(old_dentry)) {
 		error = -ENOTDIR;
 		if (oldnd.last.name[oldnd.last.len])
 			goto exit5;
-		if (newnd.last.name[newnd.last.len])
+		if (!(flags & RENAME_EXCHANGE) && newnd.last.name[newnd.last.len])
 			goto exit5;
 	}
 	/* source should not be ancestor of target */
@@ -4213,7 +4252,8 @@ retry_deleg:
 	if (old_dentry == trap)
 		goto exit5;
 	/* target should not be an ancestor of source */
-	error = -ENOTEMPTY;
+	if (!(flags & RENAME_EXCHANGE))
+		error = -ENOTEMPTY;
 	if (new_dentry == trap)
 		goto exit5;
 
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 3b50cac7ccb3..3b9bfdb83ba6 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -308,6 +308,7 @@ extern void dentry_update_name_case(struct dentry *, struct qstr *);
 
 /* used for rename() and baskets */
 extern void d_move(struct dentry *, struct dentry *);
+extern void d_exchange(struct dentry *, struct dentry *);
 extern struct dentry *d_ancestor(struct dentry *, struct dentry *);
 
 /* appendix may either be NULL or be used for transname suffixes */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 9250f4dd7d96..ca1a11bb4443 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -36,6 +36,7 @@
 #define SEEK_MAX	SEEK_HOLE
 
 #define RENAME_NOREPLACE	(1 << 0)	/* Don't overwrite target */
+#define RENAME_EXCHANGE		(1 << 1)	/* Exchange source and dest */
 
 struct fstrim_range {
 	__u64 start;
diff --git a/security/security.c b/security/security.c
index 284fbc99aa9d..8b774f362a3d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -439,6 +439,14 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
 	if (unlikely(IS_PRIVATE(old_dentry->d_inode) ||
 		     (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode))))
 		return 0;
+
+	if (flags & RENAME_EXCHANGE) {
+		int err = security_ops->path_rename(new_dir, new_dentry,
+						    old_dir, old_dentry);
+		if (err)
+			return err;
+	}
+
 	return security_ops->path_rename(old_dir, old_dentry, new_dir,
 					 new_dentry);
 }
@@ -531,6 +539,14 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
         if (unlikely(IS_PRIVATE(old_dentry->d_inode) ||
             (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode))))
 		return 0;
+
+	if (flags & RENAME_EXCHANGE) {
+		int err = security_ops->inode_rename(new_dir, new_dentry,
+						     old_dir, old_dentry);
+		if (err)
+			return err;
+	}
+
 	return security_ops->inode_rename(old_dir, old_dentry,
 					   new_dir, new_dentry);
 }
-- 
cgit v1.2.3


From 4522643aa9630be17238edf1b4c0b690c5dd7f5d Mon Sep 17 00:00:00 2001
From: Petri Gynther <pgynther@google.com>
Date: Mon, 24 Mar 2014 13:50:01 -0700
Subject: HID: uhid: Add UHID_CREATE2 + UHID_INPUT2

UHID_CREATE2:
HID report descriptor data (rd_data) is an array in struct uhid_create2_req,
instead of a pointer. Enables use from languages that don't support pointers,
e.g. Python.

UHID_INPUT2:
Data array is the last field of struct uhid_input2_req. Enables userspace to
write only the required bytes to kernel (ev.type + ev.u.input2.size + the part
of the data array that matters), instead of the entire struct uhid_input2_req.

Note:
UHID_CREATE2 increases the total size of struct uhid_event slightly, thus
increasing the size of messages that are queued for userspace. However, this
won't affect the userspace processing of these events.

[Jiri Kosina <jkosina@suse.cz>: adjust to hid_get_raw_report() and
				hid_output_raw_report() API changes]

Signed-off-by: Petri Gynther <pgynther@google.com>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/hid/uhid.txt | 11 +++++++
 drivers/hid/uhid.c         | 78 ++++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/uhid.h  | 23 ++++++++++++++
 3 files changed, 112 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/hid/uhid.txt b/Documentation/hid/uhid.txt
index dc35a2b75eee..ee6593608c8e 100644
--- a/Documentation/hid/uhid.txt
+++ b/Documentation/hid/uhid.txt
@@ -93,6 +93,11 @@ the request was handled successfully.
   event to the kernel. The payload is of type struct uhid_create_req and
   contains information about your device. You can start I/O now.
 
+  UHID_CREATE2:
+  Same as UHID_CREATE, but the HID report descriptor data (rd_data) is an array
+  inside struct uhid_create2_req, instead of a pointer to a separate array.
+  Enables use from languages that don't support pointers, e.g. Python.
+
   UHID_DESTROY:
   This destroys the internal HID device. No further I/O will be accepted. There
   may still be pending messages that you can receive with read() but no further
@@ -105,6 +110,12 @@ the request was handled successfully.
   contains a data-payload. This is the raw data that you read from your device.
   The kernel will parse the HID reports and react on it.
 
+  UHID_INPUT2:
+  Same as UHID_INPUT, but the data array is the last field of uhid_input2_req.
+  Enables userspace to write only the required bytes to kernel (ev.type +
+  ev.u.input2.size + the part of the data array that matters), instead of
+  the entire struct uhid_input2_req.
+
   UHID_FEATURE_ANSWER:
   If you receive a UHID_FEATURE request you must answer with this request. You
   must copy the "id" field from the request into the answer. Set the "err" field
diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c
index 7ed79be2686a..0d078c32db4f 100644
--- a/drivers/hid/uhid.c
+++ b/drivers/hid/uhid.c
@@ -428,6 +428,67 @@ err_free:
 	return ret;
 }
 
+static int uhid_dev_create2(struct uhid_device *uhid,
+			    const struct uhid_event *ev)
+{
+	struct hid_device *hid;
+	int ret;
+
+	if (uhid->running)
+		return -EALREADY;
+
+	uhid->rd_size = ev->u.create2.rd_size;
+	if (uhid->rd_size <= 0 || uhid->rd_size > HID_MAX_DESCRIPTOR_SIZE)
+		return -EINVAL;
+
+	uhid->rd_data = kmalloc(uhid->rd_size, GFP_KERNEL);
+	if (!uhid->rd_data)
+		return -ENOMEM;
+
+	memcpy(uhid->rd_data, ev->u.create2.rd_data, uhid->rd_size);
+
+	hid = hid_allocate_device();
+	if (IS_ERR(hid)) {
+		ret = PTR_ERR(hid);
+		goto err_free;
+	}
+
+	strncpy(hid->name, ev->u.create2.name, 127);
+	hid->name[127] = 0;
+	strncpy(hid->phys, ev->u.create2.phys, 63);
+	hid->phys[63] = 0;
+	strncpy(hid->uniq, ev->u.create2.uniq, 63);
+	hid->uniq[63] = 0;
+
+	hid->ll_driver = &uhid_hid_driver;
+	hid->bus = ev->u.create2.bus;
+	hid->vendor = ev->u.create2.vendor;
+	hid->product = ev->u.create2.product;
+	hid->version = ev->u.create2.version;
+	hid->country = ev->u.create2.country;
+	hid->driver_data = uhid;
+	hid->dev.parent = uhid_misc.this_device;
+
+	uhid->hid = hid;
+	uhid->running = true;
+
+	ret = hid_add_device(hid);
+	if (ret) {
+		hid_err(hid, "Cannot register HID device\n");
+		goto err_hid;
+	}
+
+	return 0;
+
+err_hid:
+	hid_destroy_device(hid);
+	uhid->hid = NULL;
+	uhid->running = false;
+err_free:
+	kfree(uhid->rd_data);
+	return ret;
+}
+
 static int uhid_dev_destroy(struct uhid_device *uhid)
 {
 	if (!uhid->running)
@@ -456,6 +517,17 @@ static int uhid_dev_input(struct uhid_device *uhid, struct uhid_event *ev)
 	return 0;
 }
 
+static int uhid_dev_input2(struct uhid_device *uhid, struct uhid_event *ev)
+{
+	if (!uhid->running)
+		return -EINVAL;
+
+	hid_input_report(uhid->hid, HID_INPUT_REPORT, ev->u.input2.data,
+			 min_t(size_t, ev->u.input2.size, UHID_DATA_MAX), 0);
+
+	return 0;
+}
+
 static int uhid_dev_feature_answer(struct uhid_device *uhid,
 				   struct uhid_event *ev)
 {
@@ -592,12 +664,18 @@ static ssize_t uhid_char_write(struct file *file, const char __user *buffer,
 	case UHID_CREATE:
 		ret = uhid_dev_create(uhid, &uhid->input_buf);
 		break;
+	case UHID_CREATE2:
+		ret = uhid_dev_create2(uhid, &uhid->input_buf);
+		break;
 	case UHID_DESTROY:
 		ret = uhid_dev_destroy(uhid);
 		break;
 	case UHID_INPUT:
 		ret = uhid_dev_input(uhid, &uhid->input_buf);
 		break;
+	case UHID_INPUT2:
+		ret = uhid_dev_input2(uhid, &uhid->input_buf);
+		break;
 	case UHID_FEATURE_ANSWER:
 		ret = uhid_dev_feature_answer(uhid, &uhid->input_buf);
 		break;
diff --git a/include/uapi/linux/uhid.h b/include/uapi/linux/uhid.h
index 414b74be4da1..1e3b09c191cd 100644
--- a/include/uapi/linux/uhid.h
+++ b/include/uapi/linux/uhid.h
@@ -21,6 +21,7 @@
 
 #include <linux/input.h>
 #include <linux/types.h>
+#include <linux/hid.h>
 
 enum uhid_event_type {
 	UHID_CREATE,
@@ -34,6 +35,8 @@ enum uhid_event_type {
 	UHID_INPUT,
 	UHID_FEATURE,
 	UHID_FEATURE_ANSWER,
+	UHID_CREATE2,
+	UHID_INPUT2,
 };
 
 struct uhid_create_req {
@@ -50,6 +53,19 @@ struct uhid_create_req {
 	__u32 country;
 } __attribute__((__packed__));
 
+struct uhid_create2_req {
+	__u8 name[128];
+	__u8 phys[64];
+	__u8 uniq[64];
+	__u16 rd_size;
+	__u16 bus;
+	__u32 vendor;
+	__u32 product;
+	__u32 version;
+	__u32 country;
+	__u8 rd_data[HID_MAX_DESCRIPTOR_SIZE];
+} __attribute__((__packed__));
+
 #define UHID_DATA_MAX 4096
 
 enum uhid_report_type {
@@ -63,6 +79,11 @@ struct uhid_input_req {
 	__u16 size;
 } __attribute__((__packed__));
 
+struct uhid_input2_req {
+	__u16 size;
+	__u8 data[UHID_DATA_MAX];
+} __attribute__((__packed__));
+
 struct uhid_output_req {
 	__u8 data[UHID_DATA_MAX];
 	__u16 size;
@@ -100,6 +121,8 @@ struct uhid_event {
 		struct uhid_output_ev_req output_ev;
 		struct uhid_feature_req feature;
 		struct uhid_feature_answer_req feature_answer;
+		struct uhid_create2_req create2;
+		struct uhid_input2_req input2;
 	} u;
 } __attribute__((__packed__));
 
-- 
cgit v1.2.3


From 4d99ff8f12eb20c6cde292f185cb1c8c334ba0ed Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 10 Oct 2013 17:12:18 +0400
Subject: fuse: Turn writeback cache on

Introduce a bit kernel and userspace exchange between each-other on
the init stage and turn writeback on if the userspace want this and
mount option 'allow_wbcache' is present (controlled by fusermount).

Also add each writable file into per-inode write list and call the
generic_file_aio_write to make use of the Linux page cache engine.

Signed-off-by: Maxim Patlasov <MPatlasov@parallels.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/file.c            | 11 +++++++++++
 fs/fuse/inode.c           |  5 ++++-
 include/uapi/linux/fuse.h |  7 ++++++-
 3 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 276433021561..d03a35d3197e 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -224,6 +224,8 @@ void fuse_finish_open(struct inode *inode, struct file *file)
 		spin_unlock(&fc->lock);
 		fuse_invalidate_attr(inode);
 	}
+	if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
+		fuse_link_write_file(file);
 }
 
 int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
@@ -1197,6 +1199,15 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	struct iov_iter i;
 	loff_t endbyte = 0;
 
+	if (get_fuse_conn(inode)->writeback_cache) {
+		/* Update size (EOF optimization) and mode (SUID clearing) */
+		err = fuse_update_attributes(mapping->host, NULL, file, NULL);
+		if (err)
+			return err;
+
+		return generic_file_aio_write(iocb, iov, nr_segs, pos);
+	}
+
 	WARN_ON(iocb->ki_pos != pos);
 
 	ocount = 0;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1061b0d9b86d..9ba191917415 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -887,6 +887,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 			}
 			if (arg->flags & FUSE_ASYNC_DIO)
 				fc->async_dio = 1;
+			if (arg->flags & FUSE_WRITEBACK_CACHE)
+				fc->writeback_cache = 1;
 		} else {
 			ra_pages = fc->max_read / PAGE_CACHE_SIZE;
 			fc->no_lock = 1;
@@ -914,7 +916,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
 		FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
 		FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
-		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO;
+		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
+		FUSE_WRITEBACK_CACHE;
 	req->in.h.opcode = FUSE_INIT;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(*arg);
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 60bb2f9f7b74..cf4750e1bb49 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -93,6 +93,9 @@
  *
  * 7.22
  *  - add FUSE_ASYNC_DIO
+ *
+ * 7.23
+ *  - add FUSE_WRITEBACK_CACHE
  */
 
 #ifndef _LINUX_FUSE_H
@@ -128,7 +131,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 22
+#define FUSE_KERNEL_MINOR_VERSION 23
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -219,6 +222,7 @@ struct fuse_file_lock {
  * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one)
  * FUSE_READDIRPLUS_AUTO: adaptive readdirplus
  * FUSE_ASYNC_DIO: asynchronous direct I/O submission
+ * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -236,6 +240,7 @@ struct fuse_file_lock {
 #define FUSE_DO_READDIRPLUS	(1 << 13)
 #define FUSE_READDIRPLUS_AUTO	(1 << 14)
 #define FUSE_ASYNC_DIO		(1 << 15)
+#define FUSE_WRITEBACK_CACHE	(1 << 16)
 
 /**
  * CUSE INIT request/reply flags
-- 
cgit v1.2.3


From ab9705f34ce92b4f0da494859e69688e1e34e387 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 2 Apr 2014 16:21:24 -0400
Subject: audit: renumber AUDIT_FEATURE_CHANGE into the 1300 range

1000-1099 is for configuring things.  So auditd ignored such messages.
This is about actually logging what was configured.  Move it into the
range for such types of messages.

Reported-by: Steve Grubb <sgrubb@redhat.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/uapi/linux/audit.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 4315ee99b967..11917f747cb4 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -70,7 +70,6 @@
 #define AUDIT_TTY_SET		1017	/* Set TTY auditing status */
 #define AUDIT_SET_FEATURE	1018	/* Turn an audit feature on or off */
 #define AUDIT_GET_FEATURE	1019	/* Get which features are enabled */
-#define AUDIT_FEATURE_CHANGE	1020	/* audit log listing feature changes */
 
 #define AUDIT_FIRST_USER_MSG	1100	/* Userspace messages mostly uninteresting to kernel */
 #define AUDIT_USER_AVC		1107	/* We filter this differently */
@@ -110,6 +109,7 @@
 #define AUDIT_NETFILTER_CFG	1325	/* Netfilter chain modifications */
 #define AUDIT_SECCOMP		1326	/* Secure Computing event */
 #define AUDIT_PROCTITLE		1327	/* Proctitle emit event */
+#define AUDIT_FEATURE_CHANGE	1328	/* audit log listing feature changes */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
-- 
cgit v1.2.3


From ea1a8217b06b41b31a2b60b0b83f75c77ef9c873 Mon Sep 17 00:00:00 2001
From: Serge Hallyn <serge.hallyn@ubuntu.com>
Date: Thu, 3 Apr 2014 14:48:33 -0700
Subject: xattr: guard against simultaneous glibc header inclusion

If the glibc xattr.h header is included after the uapi header,
compilation fails due to an enum re-using a #define from the uapi
header.

Protect against this by guarding the define and enum inclusions against
each other.

(See https://lists.debian.org/debian-glibc/2014/03/msg00029.html
and https://sourceware.org/glibc/wiki/Synchronizing_Headers
for more information.)

Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Allan McRae <allan@archlinux.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/libc-compat.h | 9 +++++++++
 include/uapi/linux/xattr.h       | 7 +++++++
 2 files changed, 16 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
index 335e8a7cad39..c140620dad92 100644
--- a/include/uapi/linux/libc-compat.h
+++ b/include/uapi/linux/libc-compat.h
@@ -85,6 +85,12 @@
 
 #endif /* _NETINET_IN_H */
 
+/* Definitions for xattr.h */
+#if defined(_SYS_XATTR_H)
+#define __UAPI_DEF_XATTR		0
+#else
+#define __UAPI_DEF_XATTR		1
+#endif
 
 /* If we did not see any headers from any supported C libraries,
  * or we are being included in the kernel, then define everything
@@ -98,6 +104,9 @@
 #define __UAPI_DEF_IPV6_MREQ		1
 #define __UAPI_DEF_IPPROTO_V6		1
 
+/* Definitions for xattr.h */
+#define __UAPI_DEF_XATTR		1
+
 #endif /* __GLIBC__ */
 
 #endif /* _UAPI_LIBC_COMPAT_H */
diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h
index 40bbc04b6f81..c38355c1f3c9 100644
--- a/include/uapi/linux/xattr.h
+++ b/include/uapi/linux/xattr.h
@@ -7,11 +7,18 @@
   Copyright (c) 2001-2002 Silicon Graphics, Inc.  All Rights Reserved.
   Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
 */
+
+#include <linux/libc-compat.h>
+
 #ifndef _UAPI_LINUX_XATTR_H
 #define _UAPI_LINUX_XATTR_H
 
+#ifdef __UAPI_DEF_XATTR
+#define __USE_KERNEL_XATTR_DEFS
+
 #define XATTR_CREATE	0x1	/* set value, fail if attr already exists */
 #define XATTR_REPLACE	0x2	/* set value, fail if attr does not exist */
+#endif
 
 /* Namespaces */
 #define XATTR_OS2_PREFIX "os2."
-- 
cgit v1.2.3


From 773bcf8c9ce04c62c513182620efb729c97452fc Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <m.chehab@samsung.com>
Date: Sun, 6 Apr 2014 10:48:31 -0300
Subject: [media] v4l2-common: fix warning when used on userpace

As reported by Linus, make headers_check is reporting:
   usr/include/linux/v4l2-common.h:72: found __[us]{8,16,32,64} type without #include <linux/types.h>

   which seems to have come in through commits 777f4f85b75f1 and
   254a47770163f.

That happens because struct v4l2_edid should be visible by both
subdev and V4L2 APIs. So, it was moved to v4l2-common.h.

As Linus pointed, the proper fix is to just add an include for
linux/types.h at v4l2-common.h.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/v4l2-common.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/v4l2-common.h b/include/uapi/linux/v4l2-common.h
index 270db8914c01..9bf508ad0957 100644
--- a/include/uapi/linux/v4l2-common.h
+++ b/include/uapi/linux/v4l2-common.h
@@ -29,6 +29,8 @@
 #ifndef __V4L2_COMMON__
 #define __V4L2_COMMON__
 
+#include <linux/types.h>
+
 /*
  *
  * Selection interface definitions
-- 
cgit v1.2.3


From a0715cc22601e8830ace98366c0c2bd8da52af52 Mon Sep 17 00:00:00 2001
From: Alex Thorlton <athorlton@sgi.com>
Date: Mon, 7 Apr 2014 15:37:10 -0700
Subject: mm, thp: add VM_INIT_DEF_MASK and PRCTL_THP_DISABLE

Add VM_INIT_DEF_MASK, to allow us to set the default flags for VMs.  It
also adds a prctl control which allows us to set the THP disable bit in
mm->def_flags so that VMs will pick up the setting as they are created.

Signed-off-by: Alex Thorlton <athorlton@sgi.com>
Suggested-by: Oleg Nesterov <oleg@redhat.com>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h         |  3 +++
 include/uapi/linux/prctl.h |  3 +++
 kernel/fork.c              | 11 ++++++++---
 kernel/sys.c               | 15 +++++++++++++++
 4 files changed, 29 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 35300f390eb6..c270fa68a32b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -177,6 +177,9 @@ extern unsigned int kobjsize(const void *objp);
  */
 #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
 
+/* This mask defines which mm->def_flags a process can inherit its parent */
+#define VM_INIT_DEF_MASK	VM_NOHUGEPAGE
+
 /*
  * mapping from the currently active vm_flags protection bits (the
  * low four bits) to a page protection mask..
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 289760f424aa..58afc04c107e 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -149,4 +149,7 @@
 
 #define PR_GET_TID_ADDRESS	40
 
+#define PR_SET_THP_DISABLE	41
+#define PR_GET_THP_DISABLE	42
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index abc45890f0a5..e40c0a01d5a6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -530,8 +530,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
 	atomic_set(&mm->mm_count, 1);
 	init_rwsem(&mm->mmap_sem);
 	INIT_LIST_HEAD(&mm->mmlist);
-	mm->flags = (current->mm) ?
-		(current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
 	mm->core_state = NULL;
 	atomic_long_set(&mm->nr_ptes, 0);
 	memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
@@ -540,8 +538,15 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
 	mm_init_owner(mm, p);
 	clear_tlb_flush_pending(mm);
 
-	if (likely(!mm_alloc_pgd(mm))) {
+	if (current->mm) {
+		mm->flags = current->mm->flags & MMF_INIT_MASK;
+		mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;
+	} else {
+		mm->flags = default_dump_filter;
 		mm->def_flags = 0;
+	}
+
+	if (likely(!mm_alloc_pgd(mm))) {
 		mmu_notifier_mm_init(mm);
 		return mm;
 	}
diff --git a/kernel/sys.c b/kernel/sys.c
index adaeab6f7a87..fba0f29401ea 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1996,6 +1996,21 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		if (arg2 || arg3 || arg4 || arg5)
 			return -EINVAL;
 		return current->no_new_privs ? 1 : 0;
+	case PR_GET_THP_DISABLE:
+		if (arg2 || arg3 || arg4 || arg5)
+			return -EINVAL;
+		error = !!(me->mm->def_flags & VM_NOHUGEPAGE);
+		break;
+	case PR_SET_THP_DISABLE:
+		if (arg3 || arg4 || arg5)
+			return -EINVAL;
+		down_write(&me->mm->mmap_sem);
+		if (arg2)
+			me->mm->def_flags |= VM_NOHUGEPAGE;
+		else
+			me->mm->def_flags &= ~VM_NOHUGEPAGE;
+		up_write(&me->mm->mmap_sem);
+		break;
 	default:
 		error = -EINVAL;
 		break;
-- 
cgit v1.2.3


From edd10d33283899fb15d99a290dcc9ceb3604ca78 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 3 Apr 2014 16:45:23 -0600
Subject: NVMe: Retry failed commands with non-fatal errors

For commands returned with failed status, queue these for resubmission
and continue retrying them until success or for a limited amount of
time. The final timeout was arbitrarily chosen so requests can't be
retried indefinitely.

Since these are requeued on the nvmeq that submitted the command, the
callbacks have to take an nvmeq instead of an nvme_dev as a parameter
so that we can use the locked queue to append the iod to retry later.

The nvme_iod conviently can be used to track how long we've been trying
to successfully complete an iod request. The nvme_iod also provides the
nvme prp dma mappings, so I had to move a few things around so we can
keep those mappings.

Signed-off-by: Keith Busch <keith.busch@intel.com>
[fixed checkpatch issue with long line]
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 235 ++++++++++++++++++++++++++++------------------
 drivers/block/nvme-scsi.c |  10 +-
 include/linux/nvme.h      |   4 +-
 include/uapi/linux/nvme.h |   1 +
 4 files changed, 154 insertions(+), 96 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 596e2abd7971..efa9c8f4a7a7 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -50,6 +50,7 @@
 #define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
 #define ADMIN_TIMEOUT	(60 * HZ)
+#define IOD_TIMEOUT	(4 * NVME_IO_TIMEOUT)
 
 unsigned char io_timeout = 30;
 module_param(io_timeout, byte, 0644);
@@ -94,6 +95,7 @@ struct nvme_queue {
 	wait_queue_head_t sq_full;
 	wait_queue_t sq_cong_wait;
 	struct bio_list sq_cong;
+	struct list_head iod_bio;
 	u32 __iomem *q_db;
 	u16 q_depth;
 	u16 cq_vector;
@@ -128,7 +130,7 @@ static inline void _nvme_check_size(void)
 	BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
 }
 
-typedef void (*nvme_completion_fn)(struct nvme_dev *, void *,
+typedef void (*nvme_completion_fn)(struct nvme_queue *, void *,
 						struct nvme_completion *);
 
 struct nvme_cmd_info {
@@ -200,7 +202,7 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
 #define CMD_CTX_FLUSH		(0x318 + CMD_CTX_BASE)
 #define CMD_CTX_ABORT		(0x31C + CMD_CTX_BASE)
 
-static void special_completion(struct nvme_dev *dev, void *ctx,
+static void special_completion(struct nvme_queue *nvmeq, void *ctx,
 						struct nvme_completion *cqe)
 {
 	if (ctx == CMD_CTX_CANCELLED)
@@ -208,26 +210,26 @@ static void special_completion(struct nvme_dev *dev, void *ctx,
 	if (ctx == CMD_CTX_FLUSH)
 		return;
 	if (ctx == CMD_CTX_ABORT) {
-		++dev->abort_limit;
+		++nvmeq->dev->abort_limit;
 		return;
 	}
 	if (ctx == CMD_CTX_COMPLETED) {
-		dev_warn(&dev->pci_dev->dev,
+		dev_warn(nvmeq->q_dmadev,
 				"completed id %d twice on queue %d\n",
 				cqe->command_id, le16_to_cpup(&cqe->sq_id));
 		return;
 	}
 	if (ctx == CMD_CTX_INVALID) {
-		dev_warn(&dev->pci_dev->dev,
+		dev_warn(nvmeq->q_dmadev,
 				"invalid id %d completed on queue %d\n",
 				cqe->command_id, le16_to_cpup(&cqe->sq_id));
 		return;
 	}
 
-	dev_warn(&dev->pci_dev->dev, "Unknown special completion %p\n", ctx);
+	dev_warn(nvmeq->q_dmadev, "Unknown special completion %p\n", ctx);
 }
 
-static void async_completion(struct nvme_dev *dev, void *ctx,
+static void async_completion(struct nvme_queue *nvmeq, void *ctx,
 						struct nvme_completion *cqe)
 {
 	struct async_cmd_info *cmdinfo = ctx;
@@ -357,6 +359,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp)
 		iod->npages = -1;
 		iod->length = nbytes;
 		iod->nents = 0;
+		iod->first_dma = 0ULL;
 		iod->start_time = jiffies;
 	}
 
@@ -405,19 +408,31 @@ static void nvme_end_io_acct(struct bio *bio, unsigned long start_time)
 	part_stat_unlock();
 }
 
-static void bio_completion(struct nvme_dev *dev, void *ctx,
+static void bio_completion(struct nvme_queue *nvmeq, void *ctx,
 						struct nvme_completion *cqe)
 {
 	struct nvme_iod *iod = ctx;
 	struct bio *bio = iod->private;
 	u16 status = le16_to_cpup(&cqe->status) >> 1;
 
+	if (unlikely(status)) {
+		if (!(status & NVME_SC_DNR ||
+				bio->bi_rw & REQ_FAILFAST_MASK) &&
+				(jiffies - iod->start_time) < IOD_TIMEOUT) {
+			if (!waitqueue_active(&nvmeq->sq_full))
+				add_wait_queue(&nvmeq->sq_full,
+							&nvmeq->sq_cong_wait);
+			list_add_tail(&iod->node, &nvmeq->iod_bio);
+			wake_up(&nvmeq->sq_full);
+			return;
+		}
+	}
 	if (iod->nents) {
-		dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents,
+		dma_unmap_sg(nvmeq->q_dmadev, iod->sg, iod->nents,
 			bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 		nvme_end_io_acct(bio, iod->start_time);
 	}
-	nvme_free_iod(dev, iod);
+	nvme_free_iod(nvmeq->dev, iod);
 	if (status)
 		bio_endio(bio, -EIO);
 	else
@@ -425,8 +440,8 @@ static void bio_completion(struct nvme_dev *dev, void *ctx,
 }
 
 /* length is in bytes.  gfp flags indicates whether we may sleep. */
-int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd,
-			struct nvme_iod *iod, int total_len, gfp_t gfp)
+int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
+								gfp_t gfp)
 {
 	struct dma_pool *pool;
 	int length = total_len;
@@ -439,7 +454,6 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd,
 	dma_addr_t prp_dma;
 	int nprps, i;
 
-	cmd->prp1 = cpu_to_le64(dma_addr);
 	length -= (PAGE_SIZE - offset);
 	if (length <= 0)
 		return total_len;
@@ -454,7 +468,7 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd,
 	}
 
 	if (length <= PAGE_SIZE) {
-		cmd->prp2 = cpu_to_le64(dma_addr);
+		iod->first_dma = dma_addr;
 		return total_len;
 	}
 
@@ -469,13 +483,12 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd,
 
 	prp_list = dma_pool_alloc(pool, gfp, &prp_dma);
 	if (!prp_list) {
-		cmd->prp2 = cpu_to_le64(dma_addr);
+		iod->first_dma = dma_addr;
 		iod->npages = -1;
 		return (total_len - length) + PAGE_SIZE;
 	}
 	list[0] = prp_list;
 	iod->first_dma = prp_dma;
-	cmd->prp2 = cpu_to_le64(prp_dma);
 	i = 0;
 	for (;;) {
 		if (i == PAGE_SIZE / 8) {
@@ -514,10 +527,11 @@ static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq,
 
 	bio_chain(split, bio);
 
-	if (bio_list_empty(&nvmeq->sq_cong))
+	if (!waitqueue_active(&nvmeq->sq_full))
 		add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
 	bio_list_add(&nvmeq->sq_cong, split);
 	bio_list_add(&nvmeq->sq_cong, bio);
+	wake_up(&nvmeq->sq_full);
 
 	return 0;
 }
@@ -570,25 +584,13 @@ static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod,
 	return length;
 }
 
-/*
- * We reuse the small pool to allocate the 16-byte range here as it is not
- * worth having a special pool for these or additional cases to handle freeing
- * the iod.
- */
 static int nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
 		struct bio *bio, struct nvme_iod *iod, int cmdid)
 {
-	struct nvme_dsm_range *range;
+	struct nvme_dsm_range *range =
+				(struct nvme_dsm_range *)iod_list(iod)[0];
 	struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
 
-	range = dma_pool_alloc(nvmeq->dev->prp_small_pool, GFP_ATOMIC,
-							&iod->first_dma);
-	if (!range)
-		return -ENOMEM;
-
-	iod_list(iod)[0] = (__le64 *)range;
-	iod->npages = 0;
-
 	range->cattr = cpu_to_le32(0);
 	range->nlb = cpu_to_le32(bio->bi_iter.bi_size >> ns->lba_shift);
 	range->slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector));
@@ -635,44 +637,22 @@ int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns)
 	return nvme_submit_flush(nvmeq, ns, cmdid);
 }
 
-/*
- * Called with local interrupts disabled and the q_lock held.  May not sleep.
- */
-static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
-								struct bio *bio)
+static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)
 {
+	struct bio *bio = iod->private;
+	struct nvme_ns *ns = bio->bi_bdev->bd_disk->private_data;
 	struct nvme_command *cmnd;
-	struct nvme_iod *iod;
-	enum dma_data_direction dma_dir;
-	int cmdid, length, result;
+	int cmdid;
 	u16 control;
 	u32 dsmgmt;
-	int psegs = bio_phys_segments(ns->queue, bio);
-
-	if ((bio->bi_rw & REQ_FLUSH) && psegs) {
-		result = nvme_submit_flush_data(nvmeq, ns);
-		if (result)
-			return result;
-	}
-
-	result = -ENOMEM;
-	iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC);
-	if (!iod)
-		goto nomem;
-	iod->private = bio;
 
-	result = -EBUSY;
 	cmdid = alloc_cmdid(nvmeq, iod, bio_completion, NVME_IO_TIMEOUT);
 	if (unlikely(cmdid < 0))
-		goto free_iod;
+		return cmdid;
 
-	if (bio->bi_rw & REQ_DISCARD) {
-		result = nvme_submit_discard(nvmeq, ns, bio, iod, cmdid);
-		if (result)
-			goto free_cmdid;
-		return result;
-	}
-	if ((bio->bi_rw & REQ_FLUSH) && !psegs)
+	if (bio->bi_rw & REQ_DISCARD)
+		return nvme_submit_discard(nvmeq, ns, bio, iod, cmdid);
+	if ((bio->bi_rw & REQ_FLUSH) && !iod->nents)
 		return nvme_submit_flush(nvmeq, ns, cmdid);
 
 	control = 0;
@@ -686,42 +666,85 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
 		dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
 
 	cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
-
 	memset(cmnd, 0, sizeof(*cmnd));
-	if (bio_data_dir(bio)) {
-		cmnd->rw.opcode = nvme_cmd_write;
-		dma_dir = DMA_TO_DEVICE;
-	} else {
-		cmnd->rw.opcode = nvme_cmd_read;
-		dma_dir = DMA_FROM_DEVICE;
-	}
-
-	result = nvme_map_bio(nvmeq, iod, bio, dma_dir, psegs);
-	if (result <= 0)
-		goto free_cmdid;
-	length = result;
 
+	cmnd->rw.opcode = bio_data_dir(bio) ? nvme_cmd_write : nvme_cmd_read;
 	cmnd->rw.command_id = cmdid;
 	cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
-	length = nvme_setup_prps(nvmeq->dev, &cmnd->common, iod, length,
-								GFP_ATOMIC);
+	cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+	cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
 	cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector));
-	cmnd->rw.length = cpu_to_le16((length >> ns->lba_shift) - 1);
+	cmnd->rw.length =
+		cpu_to_le16((bio->bi_iter.bi_size >> ns->lba_shift) - 1);
 	cmnd->rw.control = cpu_to_le16(control);
 	cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
 
-	nvme_start_io_acct(bio);
 	if (++nvmeq->sq_tail == nvmeq->q_depth)
 		nvmeq->sq_tail = 0;
 	writel(nvmeq->sq_tail, nvmeq->q_db);
 
 	return 0;
+}
+
+/*
+ * Called with local interrupts disabled and the q_lock held.  May not sleep.
+ */
+static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
+								struct bio *bio)
+{
+	struct nvme_iod *iod;
+	int psegs = bio_phys_segments(ns->queue, bio);
+	int result;
+
+	if ((bio->bi_rw & REQ_FLUSH) && psegs) {
+		result = nvme_submit_flush_data(nvmeq, ns);
+		if (result)
+			return result;
+	}
+
+	iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC);
+	if (!iod)
+		return -ENOMEM;
+
+	iod->private = bio;
+	if (bio->bi_rw & REQ_DISCARD) {
+		void *range;
+		/*
+		 * We reuse the small pool to allocate the 16-byte range here
+		 * as it is not worth having a special pool for these or
+		 * additional cases to handle freeing the iod.
+		 */
+		range = dma_pool_alloc(nvmeq->dev->prp_small_pool,
+						GFP_ATOMIC,
+						&iod->first_dma);
+		if (!range) {
+			result = -ENOMEM;
+			goto free_iod;
+		}
+		iod_list(iod)[0] = (__le64 *)range;
+		iod->npages = 0;
+	} else if (psegs) {
+		result = nvme_map_bio(nvmeq, iod, bio,
+			bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
+			psegs);
+		if (result <= 0)
+			goto free_iod;
+		if (nvme_setup_prps(nvmeq->dev, iod, result, GFP_ATOMIC) !=
+								result) {
+			result = -ENOMEM;
+			goto free_iod;
+		}
+		nvme_start_io_acct(bio);
+	}
+	if (unlikely(nvme_submit_iod(nvmeq, iod))) {
+		if (!waitqueue_active(&nvmeq->sq_full))
+			add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
+		list_add_tail(&iod->node, &nvmeq->iod_bio);
+	}
+	return 0;
 
- free_cmdid:
-	free_cmdid(nvmeq, cmdid, NULL);
  free_iod:
 	nvme_free_iod(nvmeq->dev, iod);
- nomem:
 	return result;
 }
 
@@ -745,7 +768,7 @@ static int nvme_process_cq(struct nvme_queue *nvmeq)
 		}
 
 		ctx = free_cmdid(nvmeq, cqe.command_id, &fn);
-		fn(nvmeq->dev, ctx, &cqe);
+		fn(nvmeq, ctx, &cqe);
 	}
 
 	/* If the controller ignores the cq head doorbell and continuously
@@ -781,7 +804,7 @@ static void nvme_make_request(struct request_queue *q, struct bio *bio)
 	if (!nvmeq->q_suspended && bio_list_empty(&nvmeq->sq_cong))
 		result = nvme_submit_bio_queue(nvmeq, ns, bio);
 	if (unlikely(result)) {
-		if (bio_list_empty(&nvmeq->sq_cong))
+		if (!waitqueue_active(&nvmeq->sq_full))
 			add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
 		bio_list_add(&nvmeq->sq_cong, bio);
 	}
@@ -825,7 +848,7 @@ struct sync_cmd_info {
 	int status;
 };
 
-static void sync_completion(struct nvme_dev *dev, void *ctx,
+static void sync_completion(struct nvme_queue *nvmeq, void *ctx,
 						struct nvme_completion *cqe)
 {
 	struct sync_cmd_info *cmdinfo = ctx;
@@ -1112,7 +1135,7 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
 		dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n", cmdid,
 								nvmeq->qid);
 		ctx = cancel_cmdid(nvmeq, cmdid, &fn);
-		fn(nvmeq->dev, ctx, &cqe);
+		fn(nvmeq, ctx, &cqe);
 	}
 }
 
@@ -1125,6 +1148,17 @@ static void nvme_free_queue(struct rcu_head *r)
 		struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
 		bio_endio(bio, -EIO);
 	}
+	while (!list_empty(&nvmeq->iod_bio)) {
+		static struct nvme_completion cqe = {
+			.status = cpu_to_le16(
+				(NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1),
+		};
+		struct nvme_iod *iod = list_first_entry(&nvmeq->iod_bio,
+							struct nvme_iod,
+							node);
+		list_del(&iod->node);
+		bio_completion(nvmeq, iod, &cqe);
+	}
 	spin_unlock_irq(&nvmeq->q_lock);
 
 	dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
@@ -1232,6 +1266,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 	init_waitqueue_head(&nvmeq->sq_full);
 	init_waitqueue_entry(&nvmeq->sq_cong_wait, nvme_thread);
 	bio_list_init(&nvmeq->sq_cong);
+	INIT_LIST_HEAD(&nvmeq->iod_bio);
 	nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
 	nvmeq->q_depth = depth;
 	nvmeq->cq_vector = vector;
@@ -1565,7 +1600,9 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 		c.rw.metadata = cpu_to_le64(meta_dma_addr);
 	}
 
-	length = nvme_setup_prps(dev, &c.common, iod, length, GFP_KERNEL);
+	length = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
+	c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+	c.rw.prp2 = cpu_to_le64(iod->first_dma);
 
 	if (length != (io.nblocks + 1) << ns->lba_shift)
 		status = -ENOMEM;
@@ -1635,8 +1672,9 @@ static int nvme_user_admin_cmd(struct nvme_dev *dev,
 								length);
 		if (IS_ERR(iod))
 			return PTR_ERR(iod);
-		length = nvme_setup_prps(dev, &c.common, iod, length,
-								GFP_KERNEL);
+		length = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
+		c.common.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+		c.common.prp2 = cpu_to_le64(iod->first_dma);
 	}
 
 	timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) :
@@ -1733,17 +1771,33 @@ static const struct block_device_operations nvme_fops = {
 	.getgeo		= nvme_getgeo,
 };
 
+static void nvme_resubmit_iods(struct nvme_queue *nvmeq)
+{
+	struct nvme_iod *iod, *next;
+
+	list_for_each_entry_safe(iod, next, &nvmeq->iod_bio, node) {
+		if (unlikely(nvme_submit_iod(nvmeq, iod)))
+			break;
+		list_del(&iod->node);
+		if (bio_list_empty(&nvmeq->sq_cong) &&
+						list_empty(&nvmeq->iod_bio))
+			remove_wait_queue(&nvmeq->sq_full,
+						&nvmeq->sq_cong_wait);
+	}
+}
+
 static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
 {
 	while (bio_list_peek(&nvmeq->sq_cong)) {
 		struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
 		struct nvme_ns *ns = bio->bi_bdev->bd_disk->private_data;
 
-		if (bio_list_empty(&nvmeq->sq_cong))
+		if (bio_list_empty(&nvmeq->sq_cong) &&
+						list_empty(&nvmeq->iod_bio))
 			remove_wait_queue(&nvmeq->sq_full,
 							&nvmeq->sq_cong_wait);
 		if (nvme_submit_bio_queue(nvmeq, ns, bio)) {
-			if (bio_list_empty(&nvmeq->sq_cong))
+			if (!waitqueue_active(&nvmeq->sq_full))
 				add_wait_queue(&nvmeq->sq_full,
 							&nvmeq->sq_cong_wait);
 			bio_list_add_head(&nvmeq->sq_cong, bio);
@@ -1785,6 +1839,7 @@ static int nvme_kthread(void *data)
 				nvme_process_cq(nvmeq);
 				nvme_cancel_ios(nvmeq, true);
 				nvme_resubmit_bios(nvmeq);
+				nvme_resubmit_iods(nvmeq);
  unlock:
 				spin_unlock_irq(&nvmeq->q_lock);
 			}
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 111c920c1574..2c3f5be06da1 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -1562,13 +1562,14 @@ static int nvme_trans_send_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 			res = PTR_ERR(iod);
 			goto out;
 		}
-		length = nvme_setup_prps(dev, &c.common, iod, tot_len,
-								GFP_KERNEL);
+		length = nvme_setup_prps(dev, iod, tot_len, GFP_KERNEL);
 		if (length != tot_len) {
 			res = -ENOMEM;
 			goto out_unmap;
 		}
 
+		c.dlfw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+		c.dlfw.prp2 = cpu_to_le64(iod->first_dma);
 		c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
 		c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
 	} else if (opcode == nvme_admin_activate_fw) {
@@ -2092,8 +2093,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 			res = PTR_ERR(iod);
 			goto out;
 		}
-		retcode = nvme_setup_prps(dev, &c.common, iod, unit_len,
-							GFP_KERNEL);
+		retcode = nvme_setup_prps(dev, iod, unit_len, GFP_KERNEL);
 		if (retcode != unit_len) {
 			nvme_unmap_user_pages(dev,
 				(is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
@@ -2102,6 +2102,8 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 			res = -ENOMEM;
 			goto out;
 		}
+		c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+		c.rw.prp2 = cpu_to_le64(iod->first_dma);
 
 		nvme_offset += unit_num_blocks;
 
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 1da0807c65bc..b95431d0338b 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -136,6 +136,7 @@ struct nvme_iod {
 	int length;		/* Of data, in bytes */
 	unsigned long start_time;
 	dma_addr_t first_dma;
+	struct list_head node;
 	struct scatterlist sg[0];
 };
 
@@ -151,8 +152,7 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
  */
 void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod);
 
-int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd,
-			struct nvme_iod *iod, int total_len, gfp_t gfp);
+int nvme_setup_prps(struct nvme_dev *, struct nvme_iod *, int , gfp_t);
 struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
 				unsigned long addr, unsigned length);
 void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index e5ab62201119..096fe1c6f83d 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -434,6 +434,7 @@ enum {
 	NVME_SC_REFTAG_CHECK		= 0x284,
 	NVME_SC_COMPARE_FAILED		= 0x285,
 	NVME_SC_ACCESS_DENIED		= 0x286,
+	NVME_SC_DNR			= 0x4000,
 };
 
 struct nvme_completion {
-- 
cgit v1.2.3