From 735b9ffada0563afa52d0879cf6d630819ee68ad Mon Sep 17 00:00:00 2001
From: Damien Lespiau <damien.lespiau@intel.com>
Date: Mon, 9 Jun 2014 14:39:50 +0100
Subject: drm: Driver-specific ioctls range from 0x40 to 0x9f

DRM_COMMAND_END is 0xa0, so the last driver ioctl is 0x9f, not 0x99.

Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 include/uapi/drm/drm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 9abbeb924cbb..b0b855613641 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -780,7 +780,7 @@ struct drm_prime_handle {
 
 /**
  * Device specific ioctls should only be in their respective headers
- * The device specific ioctl range is from 0x40 to 0x99.
+ * The device specific ioctl range is from 0x40 to 0x9f.
  * Generic IOCTLS restart at 0xA0.
  *
  * \sa drmCommandNone(), drmCommandRead(), drmCommandWrite(), and
-- 
cgit v1.2.3


From d15156138dad40205c9fdd9abe85c9e1479ae272 Mon Sep 17 00:00:00 2001
From: Douglas Gilbert <dgilbert@interlog.com>
Date: Tue, 1 Jul 2014 10:48:05 -0600
Subject: block SG_IO: add SG_FLAG_Q_AT_HEAD flag

After the SG_IO ioctl was copied into the block layer and
later into the bsg driver, subtle differences emerged.

One difference is the way injected commands are queued through
the block layer (i.e. this is not SCSI device queueing nor SATA
NCQ). Summarizing:
  - SG_IO on block layer device: blk_exec*(at_head=false)
  - sg device SG_IO: at_head=true
  - bsg device SG_IO: at_head=true

Some time ago Boaz Harrosh introduced a sg v4 flag called
BSG_FLAG_Q_AT_TAIL to override the bsg driver default. A
recent patch titled: "sg: add SG_FLAG_Q_AT_TAIL flag"
allowed the sg driver default to be overridden. This patch
allows a SG_IO ioctl sent to a block layer device to have
its default overridden.

ChangeLog:
    - introduce SG_FLAG_Q_AT_HEAD flag in sg.h to cause
      commands that are injected via a block layer
      device SG_IO ioctl to set at_head=true
    - make comments clearer about queueing in sg.h since the
      header is used both by the sg device and block layer
      device implementations of the SG_IO ioctl.
    - introduce BSG_FLAG_Q_AT_HEAD in bsg.h for compatibility
      (it does nothing) and update comments.

Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/scsi_ioctl.c       |  5 ++++-
 include/scsi/sg.h        |  3 +++
 include/uapi/linux/bsg.h | 11 ++++++-----
 3 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index bda1497add4c..51bf5155ee75 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -290,6 +290,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 	unsigned long start_time;
 	ssize_t ret = 0;
 	int writing = 0;
+	int at_head = 0;
 	struct request *rq;
 	char sense[SCSI_SENSE_BUFFERSIZE];
 	struct bio *bio;
@@ -313,6 +314,8 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 		case SG_DXFER_FROM_DEV:
 			break;
 		}
+	if (hdr->flags & SG_FLAG_Q_AT_HEAD)
+		at_head = 1;
 
 	rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL);
 	if (!rq)
@@ -369,7 +372,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 	 * (if he doesn't check that is his problem).
 	 * N.B. a non-zero SCSI status is _not_ necessarily an error.
 	 */
-	blk_execute_rq(q, bd_disk, rq, 0);
+	blk_execute_rq(q, bd_disk, rq, at_head);
 
 	hdr->duration = jiffies_to_msecs(jiffies - start_time);
 
diff --git a/include/scsi/sg.h b/include/scsi/sg.h
index a9f3c6fc3f57..4734c15ab5d6 100644
--- a/include/scsi/sg.h
+++ b/include/scsi/sg.h
@@ -129,6 +129,9 @@ typedef struct sg_io_hdr
 #define SG_FLAG_MMAP_IO 4       /* request memory mapped IO */
 #define SG_FLAG_NO_DXFER 0x10000 /* no transfer of kernel buffers to/from */
 				/* user space (debug indirect IO) */
+/* defaults:: for sg driver: Q_AT_HEAD; for block layer: Q_AT_TAIL */
+#define SG_FLAG_Q_AT_TAIL 0x10
+#define SG_FLAG_Q_AT_HEAD 0x20
 
 /* following 'info' values are "or"-ed together */
 #define SG_INFO_OK_MASK 0x1
diff --git a/include/uapi/linux/bsg.h b/include/uapi/linux/bsg.h
index 7a12e1c0f371..02986cf8b6f1 100644
--- a/include/uapi/linux/bsg.h
+++ b/include/uapi/linux/bsg.h
@@ -10,12 +10,13 @@
 #define BSG_SUB_PROTOCOL_SCSI_TRANSPORT	2
 
 /*
- * For flags member below
- * sg.h sg_io_hdr also has bits defined for it's flags member. However
- * none of these bits are implemented/used by bsg. The bits below are
- * allocated to not conflict with sg.h ones anyway.
+ * For flag constants below:
+ * sg.h sg_io_hdr also has bits defined for it's flags member. These
+ * two flag values (0x10 and 0x20) have the same meaning in sg.h . For
+ * bsg the BSG_FLAG_Q_AT_HEAD flag is ignored since it is the deafult.
  */
-#define BSG_FLAG_Q_AT_TAIL 0x10 /* default, == 0 at this bit, is Q_AT_HEAD */
+#define BSG_FLAG_Q_AT_TAIL 0x10 /* default is Q_AT_HEAD */
+#define BSG_FLAG_Q_AT_HEAD 0x20
 
 struct sg_io_v4 {
 	__s32 guard;		/* [i] 'Q' to differentiate from v3 */
-- 
cgit v1.2.3


From cb553215d5d277d4838d7d6b7722e964bcf5ca1f Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Thu, 26 Jun 2014 17:41:47 +0800
Subject: include/uapi/linux/virtio_blk.h: introduce feature of VIRTIO_BLK_F_MQ

Current virtio-blk spec only supports one virtual queue for transfering
data between VM and host, and inside VM all kinds of operations on
the virtual queue needs to hold one lock, so cause below problems:

	- bad scalability
	- bad throughput

This patch requests to introduce feature of VIRTIO_BLK_F_MQ
so that more than one virtual queues can be used to virtio-blk
device, then above problems can be solved or eased.

Signed-off-by: Ming Lei <ming.lei@canonical.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/uapi/linux/virtio_blk.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index 6d8e61c48563..9ad67b267584 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -40,6 +40,7 @@
 #define VIRTIO_BLK_F_WCE	9	/* Writeback mode enabled after reset */
 #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
 #define VIRTIO_BLK_F_CONFIG_WCE	11	/* Writeback mode available in config */
+#define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
 
 #ifndef __KERNEL__
 /* Old (deprecated) name for VIRTIO_BLK_F_WCE. */
@@ -77,6 +78,10 @@ struct virtio_blk_config {
 
 	/* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */
 	__u8 wce;
+	__u8 unused;
+
+	/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
+	__u16 num_queues;
 } __attribute__((packed));
 
 /*
-- 
cgit v1.2.3


From b4e05923f9c5bb65ac82988d7b53cfd7425e6f36 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 20 Jul 2014 13:35:44 -0700
Subject: Input: add support for Wacom protocol 4 serial tablets

Recent version of xf86-input-wacom no longer support directly accessing
serial tablets. Instead xf86-input-wacom now expects all wacom tablets to
be driven by the kernel and to show up as evdev devices.

This has caused old serial Wacom tablets to stop working for people who still
have such tablets. Julian Squires has written a serio input driver to fix this:
https://github.com/tokenrove/wacom-serial-iv

This is a cleaned up version of this driver with improved Graphire support
(I own an old Graphire myself).

Signed-off-by: Julian Squires <julian@cipht.net>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 MAINTAINERS                          |   7 +
 drivers/input/tablet/Kconfig         |  10 +
 drivers/input/tablet/Makefile        |   1 +
 drivers/input/tablet/wacom_serial4.c | 616 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/serio.h           |   1 +
 5 files changed, 635 insertions(+)
 create mode 100644 drivers/input/tablet/wacom_serial4.c

(limited to 'include/uapi')

diff --git a/MAINTAINERS b/MAINTAINERS
index 51ebb779c5f3..c68bd8b6f8e9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9708,6 +9708,13 @@ M:	Pierre Ossman <pierre@ossman.eu>
 S:	Maintained
 F:	drivers/mmc/host/wbsd.*
 
+WACOM PROTOCOL 4 SERIAL TABLETS
+M:	Julian Squires <julian@cipht.net>
+M:	Hans de Goede <hdegoede@redhat.com>
+L:	linux-input@vger.kernel.org
+S:	Maintained
+F:	drivers/input/tablet/wacom_serial4.c
+
 WATCHDOG DEVICE DRIVERS
 M:	Wim Van Sebroeck <wim@iguana.be>
 L:	linux-watchdog@vger.kernel.org
diff --git a/drivers/input/tablet/Kconfig b/drivers/input/tablet/Kconfig
index bed7cbf84cfd..a5121b09c63f 100644
--- a/drivers/input/tablet/Kconfig
+++ b/drivers/input/tablet/Kconfig
@@ -89,4 +89,14 @@ config TABLET_USB_WACOM
 	  To compile this driver as a module, choose M here: the
 	  module will be called wacom.
 
+config TABLET_SERIAL_WACOM4
+	tristate "Wacom protocol 4 serial tablet support"
+	select SERIO
+	help
+	  Say Y here if you want to use Wacom protocol 4 serial tablets.
+	  E.g. serial versions of the Cintiq, Graphire or Penpartner.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called wacom_serial4.
+
 endif
diff --git a/drivers/input/tablet/Makefile b/drivers/input/tablet/Makefile
index 3f6c25220638..4d9339fb3b63 100644
--- a/drivers/input/tablet/Makefile
+++ b/drivers/input/tablet/Makefile
@@ -11,3 +11,4 @@ obj-$(CONFIG_TABLET_USB_GTCO)	+= gtco.o
 obj-$(CONFIG_TABLET_USB_HANWANG) += hanwang.o
 obj-$(CONFIG_TABLET_USB_KBTAB)	+= kbtab.o
 obj-$(CONFIG_TABLET_USB_WACOM)	+= wacom.o
+obj-$(CONFIG_TABLET_SERIAL_WACOM4) += wacom_serial4.o
diff --git a/drivers/input/tablet/wacom_serial4.c b/drivers/input/tablet/wacom_serial4.c
new file mode 100644
index 000000000000..d3d251e27307
--- /dev/null
+++ b/drivers/input/tablet/wacom_serial4.c
@@ -0,0 +1,616 @@
+/*
+ * Wacom protocol 4 serial tablet driver
+ *
+ * Copyright 2014      Hans de Goede <hdegoede@redhat.com>
+ * Copyright 2011-2012 Julian Squires <julian@cipht.net>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version of 2 of the License, or (at your
+ * option) any later version. See the file COPYING in the main directory of
+ * this archive for more details.
+ *
+ * Many thanks to Bill Seremetis, without whom PenPartner support
+ * would not have been possible. Thanks to Patrick Mahoney.
+ *
+ * This driver was developed with reference to much code written by others,
+ * particularly:
+ *  - elo, gunze drivers by Vojtech Pavlik <vojtech@ucw.cz>;
+ *  - wacom_w8001 driver by Jaya Kumar <jayakumar.lkml@gmail.com>;
+ *  - the USB wacom input driver, credited to many people
+ *    (see drivers/input/tablet/wacom.h);
+ *  - new and old versions of linuxwacom / xf86-input-wacom credited to
+ *    Frederic Lepied, France. <Lepied@XFree86.org> and
+ *    Ping Cheng, Wacom. <pingc@wacom.com>;
+ *  - and xf86wacom.c (a presumably ancient version of the linuxwacom code),
+ *    by Frederic Lepied and Raph Levien <raph@gtk.org>.
+ *
+ * To do:
+ *  - support pad buttons; (requires access to a model with pad buttons)
+ *  - support (protocol 4-style) tilt (requires access to a > 1.4 rom model)
+ */
+
+/*
+ * Wacom serial protocol 4 documentation taken from linuxwacom-0.9.9 code,
+ * protocol 4 uses 7 or 9 byte of data in the following format:
+ *
+ *	Byte 1
+ *	bit 7  Sync bit always 1
+ *	bit 6  Pointing device detected
+ *	bit 5  Cursor = 0 / Stylus = 1
+ *	bit 4  Reserved
+ *	bit 3  1 if a button on the pointing device has been pressed
+ *	bit 2  P0 (optional)
+ *	bit 1  X15
+ *	bit 0  X14
+ *
+ *	Byte 2
+ *	bit 7  Always 0
+ *	bits 6-0 = X13 - X7
+ *
+ *	Byte 3
+ *	bit 7  Always 0
+ *	bits 6-0 = X6 - X0
+ *
+ *	Byte 4
+ *	bit 7  Always 0
+ *	bit 6  B3
+ *	bit 5  B2
+ *	bit 4  B1
+ *	bit 3  B0
+ *	bit 2  P1 (optional)
+ *	bit 1  Y15
+ *	bit 0  Y14
+ *
+ *	Byte 5
+ *	bit 7  Always 0
+ *	bits 6-0 = Y13 - Y7
+ *
+ *	Byte 6
+ *	bit 7  Always 0
+ *	bits 6-0 = Y6 - Y0
+ *
+ *	Byte 7
+ *	bit 7 Always 0
+ *	bit 6  Sign of pressure data; or wheel-rel for cursor tool
+ *	bit 5  P7; or REL1 for cursor tool
+ *	bit 4  P6; or REL0 for cursor tool
+ *	bit 3  P5
+ *	bit 2  P4
+ *	bit 1  P3
+ *	bit 0  P2
+ *
+ *	byte 8 and 9 are optional and present only
+ *	in tilt mode.
+ *
+ *	Byte 8
+ *	bit 7 Always 0
+ *	bit 6 Sign of tilt X
+ *	bit 5  Xt6
+ *	bit 4  Xt5
+ *	bit 3  Xt4
+ *	bit 2  Xt3
+ *	bit 1  Xt2
+ *	bit 0  Xt1
+ *
+ *	Byte 9
+ *	bit 7 Always 0
+ *	bit 6 Sign of tilt Y
+ *	bit 5  Yt6
+ *	bit 4  Yt5
+ *	bit 3  Yt4
+ *	bit 2  Yt3
+ *	bit 1  Yt2
+ *	bit 0  Yt1
+ */
+
+#include <linux/completion.h>
+#include <linux/init.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/serio.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include "wacom_wac.h"
+
+MODULE_AUTHOR("Julian Squires <julian@cipht.net>, Hans de Goede <hdegoede@redhat.com>");
+MODULE_DESCRIPTION("Wacom protocol 4 serial tablet driver");
+MODULE_LICENSE("GPL");
+
+#define REQUEST_MODEL_AND_ROM_VERSION	"~#"
+#define REQUEST_MAX_COORDINATES		"~C\r"
+#define REQUEST_CONFIGURATION_STRING	"~R\r"
+#define REQUEST_RESET_TO_PROTOCOL_IV	"\r#"
+/*
+ * Note: sending "\r$\r" causes at least the Digitizer II to send
+ * packets in ASCII instead of binary.  "\r#" seems to undo that.
+ */
+
+#define COMMAND_START_SENDING_PACKETS		"ST\r"
+#define COMMAND_STOP_SENDING_PACKETS		"SP\r"
+#define COMMAND_MULTI_MODE_INPUT		"MU1\r"
+#define COMMAND_ORIGIN_IN_UPPER_LEFT		"OC1\r"
+#define COMMAND_ENABLE_ALL_MACRO_BUTTONS	"~M0\r"
+#define COMMAND_DISABLE_GROUP_1_MACRO_BUTTONS	"~M1\r"
+#define COMMAND_TRANSMIT_AT_MAX_RATE		"IT0\r"
+#define COMMAND_DISABLE_INCREMENTAL_MODE	"IN0\r"
+#define COMMAND_ENABLE_CONTINUOUS_MODE		"SR\r"
+#define COMMAND_ENABLE_PRESSURE_MODE		"PH1\r"
+#define COMMAND_Z_FILTER			"ZF1\r"
+
+/* Note that this is a protocol 4 packet without tilt information. */
+#define PACKET_LENGTH		7
+#define DATA_SIZE		32
+
+/* flags */
+#define F_COVERS_SCREEN		0x01
+#define F_HAS_STYLUS2		0x02
+#define F_HAS_SCROLLWHEEL	0x04
+
+enum { STYLUS = 1, ERASER, CURSOR };
+
+static const struct {
+	int device_id;
+	int input_id;
+} tools[] = {
+	{ 0, 0 },
+	{ STYLUS_DEVICE_ID, BTN_TOOL_PEN },
+	{ ERASER_DEVICE_ID, BTN_TOOL_RUBBER },
+	{ CURSOR_DEVICE_ID, BTN_TOOL_MOUSE },
+};
+
+struct wacom {
+	struct input_dev *dev;
+	struct completion cmd_done;
+	int result;
+	u8 expect;
+	u8 eraser_mask;
+	unsigned int extra_z_bits;
+	unsigned int flags;
+	unsigned int res_x, res_y;
+	unsigned int max_x, max_y;
+	unsigned int tool;
+	unsigned int idx;
+	u8 data[DATA_SIZE];
+	char phys[32];
+};
+
+enum {
+	MODEL_CINTIQ		= 0x504C, /* PL */
+	MODEL_CINTIQ2		= 0x4454, /* DT */
+	MODEL_DIGITIZER_II	= 0x5544, /* UD */
+	MODEL_GRAPHIRE		= 0x4554, /* ET */
+	MODEL_PENPARTNER	= 0x4354, /* CT */
+};
+
+static void wacom_handle_model_response(struct wacom *wacom)
+{
+	int major_v, minor_v, r = 0;
+	char *p;
+
+	p = strrchr(wacom->data, 'V');
+	if (p)
+		r = sscanf(p + 1, "%u.%u", &major_v, &minor_v);
+	if (r != 2)
+		major_v = minor_v = 0;
+
+	switch (wacom->data[2] << 8 | wacom->data[3]) {
+	case MODEL_CINTIQ:	/* UNTESTED */
+	case MODEL_CINTIQ2:
+		if ((wacom->data[2] << 8 | wacom->data[3]) == MODEL_CINTIQ) {
+			wacom->dev->name = "Wacom Cintiq";
+			wacom->dev->id.version = MODEL_CINTIQ;
+		} else {
+			wacom->dev->name = "Wacom Cintiq II";
+			wacom->dev->id.version = MODEL_CINTIQ2;
+		}
+		wacom->res_x = 508;
+		wacom->res_y = 508;
+
+		switch (wacom->data[5] << 8 | wacom->data[6]) {
+		case 0x3731: /* PL-710 */
+			wacom->res_x = 2540;
+			wacom->res_y = 2540;
+			/* fall through */
+		case 0x3535: /* PL-550 */
+		case 0x3830: /* PL-800 */
+			wacom->extra_z_bits = 2;
+		}
+
+		wacom->flags = F_COVERS_SCREEN;
+		break;
+
+	case MODEL_PENPARTNER:
+		wacom->dev->name = "Wacom Penpartner";
+		wacom->dev->id.version = MODEL_PENPARTNER;
+		wacom->res_x = 1000;
+		wacom->res_y = 1000;
+		break;
+
+	case MODEL_GRAPHIRE:
+		wacom->dev->name = "Wacom Graphire";
+		wacom->dev->id.version = MODEL_GRAPHIRE;
+		wacom->res_x = 1016;
+		wacom->res_y = 1016;
+		wacom->max_x = 5103;
+		wacom->max_y = 3711;
+		wacom->extra_z_bits = 2;
+		wacom->eraser_mask = 0x08;
+		wacom->flags = F_HAS_STYLUS2 | F_HAS_SCROLLWHEEL;
+		break;
+
+	case MODEL_DIGITIZER_II:
+		wacom->dev->name = "Wacom Digitizer II";
+		wacom->dev->id.version = MODEL_DIGITIZER_II;
+		if (major_v == 1 && minor_v <= 2)
+			wacom->extra_z_bits = 0; /* UNTESTED */
+		break;
+
+	default:
+		dev_err(&wacom->dev->dev, "Unsupported Wacom model %s\n",
+			wacom->data);
+		wacom->result = -ENODEV;
+		return;
+	}
+
+	dev_info(&wacom->dev->dev, "%s tablet, version %u.%u\n",
+		 wacom->dev->name, major_v, minor_v);
+}
+
+static void wacom_handle_configuration_response(struct wacom *wacom)
+{
+	int r, skip;
+
+	dev_dbg(&wacom->dev->dev, "Configuration string: %s\n", wacom->data);
+	r = sscanf(wacom->data, "~R%x,%u,%u,%u,%u", &skip, &skip, &skip,
+		   &wacom->res_x, &wacom->res_y);
+	if (r != 5)
+		dev_warn(&wacom->dev->dev, "could not get resolution\n");
+}
+
+static void wacom_handle_coordinates_response(struct wacom *wacom)
+{
+	int r;
+
+	dev_dbg(&wacom->dev->dev, "Coordinates string: %s\n", wacom->data);
+	r = sscanf(wacom->data, "~C%u,%u", &wacom->max_x, &wacom->max_y);
+	if (r != 2)
+		dev_warn(&wacom->dev->dev, "could not get max coordinates\n");
+}
+
+static void wacom_handle_response(struct wacom *wacom)
+{
+	if (wacom->data[0] != '~' || wacom->data[1] != wacom->expect) {
+		dev_err(&wacom->dev->dev,
+			"Wacom got an unexpected response: %s\n", wacom->data);
+		wacom->result = -EIO;
+	} else {
+		wacom->result = 0;
+
+		switch (wacom->data[1]) {
+		case '#':
+			wacom_handle_model_response(wacom);
+			break;
+		case 'R':
+			wacom_handle_configuration_response(wacom);
+			break;
+		case 'C':
+			wacom_handle_coordinates_response(wacom);
+			break;
+		}
+	}
+
+	complete(&wacom->cmd_done);
+}
+
+static void wacom_handle_packet(struct wacom *wacom)
+{
+	u8 in_proximity_p, stylus_p, button;
+	unsigned int tool;
+	int x, y, z;
+
+	in_proximity_p = wacom->data[0] & 0x40;
+	stylus_p = wacom->data[0] & 0x20;
+	button = (wacom->data[3] & 0x78) >> 3;
+	x = (wacom->data[0] & 3) << 14 | wacom->data[1]<<7 | wacom->data[2];
+	y = (wacom->data[3] & 3) << 14 | wacom->data[4]<<7 | wacom->data[5];
+
+	if (in_proximity_p && stylus_p) {
+		z = wacom->data[6] & 0x7f;
+		if (wacom->extra_z_bits >= 1)
+			z = z << 1 | (wacom->data[3] & 0x4) >> 2;
+		if (wacom->extra_z_bits > 1)
+			z = z << 1 | (wacom->data[0] & 0x4) >> 2;
+		z = z ^ (0x40 << wacom->extra_z_bits);
+	} else {
+		z = -1;
+	}
+
+	if (stylus_p)
+		tool = (button & wacom->eraser_mask) ? ERASER : STYLUS;
+	else
+		tool = CURSOR;
+
+	if (tool != wacom->tool && wacom->tool != 0) {
+		input_report_key(wacom->dev, tools[wacom->tool].input_id, 0);
+		input_sync(wacom->dev);
+	}
+	wacom->tool = tool;
+
+	input_report_key(wacom->dev, tools[tool].input_id, in_proximity_p);
+	input_report_abs(wacom->dev, ABS_MISC,
+			 in_proximity_p ? tools[tool].device_id : 0);
+	input_report_abs(wacom->dev, ABS_X, x);
+	input_report_abs(wacom->dev, ABS_Y, y);
+	input_report_abs(wacom->dev, ABS_PRESSURE, z);
+	if (stylus_p) {
+		input_report_key(wacom->dev, BTN_TOUCH, button & 1);
+		input_report_key(wacom->dev, BTN_STYLUS, button & 2);
+		input_report_key(wacom->dev, BTN_STYLUS2, button & 4);
+	} else {
+		input_report_key(wacom->dev, BTN_LEFT, button & 1);
+		input_report_key(wacom->dev, BTN_RIGHT, button & 2);
+		input_report_key(wacom->dev, BTN_MIDDLE, button & 4);
+		/* handle relative wheel for non-stylus device */
+		z = (wacom->data[6] & 0x30) >> 4;
+		if (wacom->data[6] & 0x40)
+			z = -z;
+		input_report_rel(wacom->dev, REL_WHEEL, z);
+	}
+	input_sync(wacom->dev);
+}
+
+static void wacom_clear_data_buf(struct wacom *wacom)
+{
+	memset(wacom->data, 0, DATA_SIZE);
+	wacom->idx = 0;
+}
+
+static irqreturn_t wacom_interrupt(struct serio *serio, unsigned char data,
+				   unsigned int flags)
+{
+	struct wacom *wacom = serio_get_drvdata(serio);
+
+	if (data & 0x80)
+		wacom->idx = 0;
+
+	/*
+	 * We're either expecting a carriage return-terminated ASCII
+	 * response string, or a seven-byte packet with the MSB set on
+	 * the first byte.
+	 *
+	 * Note however that some tablets (the PenPartner, for
+	 * example) don't send a carriage return at the end of a
+	 * command.  We handle these by waiting for timeout.
+	 */
+	if (data == '\r' && !(wacom->data[0] & 0x80)) {
+		wacom_handle_response(wacom);
+		wacom_clear_data_buf(wacom);
+		return IRQ_HANDLED;
+	}
+
+	/* Leave place for 0 termination */
+	if (wacom->idx > (DATA_SIZE - 2)) {
+		dev_dbg(&wacom->dev->dev,
+			"throwing away %d bytes of garbage\n", wacom->idx);
+		wacom_clear_data_buf(wacom);
+	}
+	wacom->data[wacom->idx++] = data;
+
+	if (wacom->idx == PACKET_LENGTH && (wacom->data[0] & 0x80)) {
+		wacom_handle_packet(wacom);
+		wacom_clear_data_buf(wacom);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void wacom_disconnect(struct serio *serio)
+{
+	struct wacom *wacom = serio_get_drvdata(serio);
+
+	serio_close(serio);
+	serio_set_drvdata(serio, NULL);
+	input_unregister_device(wacom->dev);
+	kfree(wacom);
+}
+
+static int wacom_send(struct serio *serio, const u8 *command)
+{
+	int err = 0;
+
+	for (; !err && *command; command++)
+		err = serio_write(serio, *command);
+
+	return err;
+}
+
+static int wacom_send_setup_string(struct wacom *wacom, struct serio *serio)
+{
+	const u8 *cmd;
+
+	switch (wacom->dev->id.version) {
+	case MODEL_CINTIQ:	/* UNTESTED */
+		cmd = COMMAND_ORIGIN_IN_UPPER_LEFT
+			COMMAND_TRANSMIT_AT_MAX_RATE
+			COMMAND_ENABLE_CONTINUOUS_MODE
+			COMMAND_START_SENDING_PACKETS;
+		break;
+
+	case MODEL_PENPARTNER:
+		cmd = COMMAND_ENABLE_PRESSURE_MODE
+			COMMAND_START_SENDING_PACKETS;
+		break;
+
+	default:
+		cmd = COMMAND_MULTI_MODE_INPUT
+			COMMAND_ORIGIN_IN_UPPER_LEFT
+			COMMAND_ENABLE_ALL_MACRO_BUTTONS
+			COMMAND_DISABLE_GROUP_1_MACRO_BUTTONS
+			COMMAND_TRANSMIT_AT_MAX_RATE
+			COMMAND_DISABLE_INCREMENTAL_MODE
+			COMMAND_ENABLE_CONTINUOUS_MODE
+			COMMAND_Z_FILTER
+			COMMAND_START_SENDING_PACKETS;
+		break;
+	}
+
+	return wacom_send(serio, cmd);
+}
+
+static int wacom_send_and_wait(struct wacom *wacom, struct serio *serio,
+			       const u8 *cmd, const char *desc)
+{
+	int err;
+	unsigned long u;
+
+	wacom->expect = cmd[1];
+	init_completion(&wacom->cmd_done);
+
+	err = wacom_send(serio, cmd);
+	if (err)
+		return err;
+
+	u = wait_for_completion_timeout(&wacom->cmd_done, HZ);
+	if (u == 0) {
+		/* Timeout, process what we've received. */
+		wacom_handle_response(wacom);
+	}
+
+	wacom->expect = 0;
+	return wacom->result;
+}
+
+static int wacom_setup(struct wacom *wacom, struct serio *serio)
+{
+	int err;
+
+	/* Note that setting the link speed is the job of inputattach.
+	 * We assume that reset negotiation has already happened,
+	 * here. */
+	err = wacom_send_and_wait(wacom, serio, REQUEST_MODEL_AND_ROM_VERSION,
+				  "model and version");
+	if (err)
+		return err;
+
+	if (!(wacom->res_x && wacom->res_y)) {
+		err = wacom_send_and_wait(wacom, serio,
+					  REQUEST_CONFIGURATION_STRING,
+					  "configuration string");
+		if (err)
+			return err;
+	}
+
+	if (!(wacom->max_x && wacom->max_y)) {
+		err = wacom_send_and_wait(wacom, serio,
+					  REQUEST_MAX_COORDINATES,
+					  "coordinates string");
+		if (err)
+			return err;
+	}
+
+	return wacom_send_setup_string(wacom, serio);
+}
+
+static int wacom_connect(struct serio *serio, struct serio_driver *drv)
+{
+	struct wacom *wacom;
+	struct input_dev *input_dev;
+	int err = -ENOMEM;
+
+	wacom = kzalloc(sizeof(struct wacom), GFP_KERNEL);
+	input_dev = input_allocate_device();
+	if (!wacom || !input_dev)
+		goto free_device;
+
+	wacom->dev = input_dev;
+	wacom->extra_z_bits = 1;
+	wacom->eraser_mask = 0x04;
+	wacom->tool = wacom->idx = 0;
+	snprintf(wacom->phys, sizeof(wacom->phys), "%s/input0", serio->phys);
+	input_dev->phys = wacom->phys;
+	input_dev->id.bustype = BUS_RS232;
+	input_dev->id.vendor  = SERIO_WACOM_IV;
+	input_dev->id.product = serio->id.extra;
+	input_dev->dev.parent = &serio->dev;
+
+	input_dev->evbit[0] =
+		BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) | BIT_MASK(EV_REL);
+	set_bit(ABS_MISC, input_dev->absbit);
+	set_bit(BTN_TOOL_PEN, input_dev->keybit);
+	set_bit(BTN_TOOL_RUBBER, input_dev->keybit);
+	set_bit(BTN_TOOL_MOUSE, input_dev->keybit);
+	set_bit(BTN_TOUCH, input_dev->keybit);
+	set_bit(BTN_STYLUS, input_dev->keybit);
+	set_bit(BTN_LEFT, input_dev->keybit);
+	set_bit(BTN_RIGHT, input_dev->keybit);
+	set_bit(BTN_MIDDLE, input_dev->keybit);
+
+	serio_set_drvdata(serio, wacom);
+
+	err = serio_open(serio, drv);
+	if (err)
+		goto free_device;
+
+	err = wacom_setup(wacom, serio);
+	if (err)
+		goto close_serio;
+
+	set_bit(INPUT_PROP_DIRECT, input_dev->propbit);
+	if (!(wacom->flags & F_COVERS_SCREEN))
+		__set_bit(INPUT_PROP_POINTER, input_dev->propbit);
+
+	if (wacom->flags & F_HAS_STYLUS2)
+		__set_bit(BTN_STYLUS2, input_dev->keybit);
+
+	if (wacom->flags & F_HAS_SCROLLWHEEL)
+		__set_bit(REL_WHEEL, input_dev->relbit);
+
+	input_abs_set_res(wacom->dev, ABS_X, wacom->res_x);
+	input_abs_set_res(wacom->dev, ABS_Y, wacom->res_y);
+	input_set_abs_params(wacom->dev, ABS_X, 0, wacom->max_x, 0, 0);
+	input_set_abs_params(wacom->dev, ABS_Y, 0, wacom->max_y, 0, 0);
+	input_set_abs_params(wacom->dev, ABS_PRESSURE, -1,
+			     (1 << (7 + wacom->extra_z_bits)) - 1, 0, 0);
+
+	err = input_register_device(wacom->dev);
+	if (err)
+		goto close_serio;
+
+	return 0;
+
+close_serio:
+	serio_close(serio);
+free_device:
+	serio_set_drvdata(serio, NULL);
+	input_free_device(input_dev);
+	kfree(wacom);
+	return err;
+}
+
+static struct serio_device_id wacom_serio_ids[] = {
+	{
+		.type	= SERIO_RS232,
+		.proto	= SERIO_WACOM_IV,
+		.id	= SERIO_ANY,
+		.extra	= SERIO_ANY,
+	},
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(serio, wacom_serio_ids);
+
+static struct serio_driver wacom_drv = {
+	.driver		= {
+		.name	= "wacom_serial4",
+	},
+	.description	= "Wacom protocol 4 serial tablet driver",
+	.id_table	= wacom_serio_ids,
+	.interrupt	= wacom_interrupt,
+	.connect	= wacom_connect,
+	.disconnect	= wacom_disconnect,
+};
+
+module_serio_driver(wacom_drv);
diff --git a/include/uapi/linux/serio.h b/include/uapi/linux/serio.h
index 9f53fa7fc132..becdd78295cc 100644
--- a/include/uapi/linux/serio.h
+++ b/include/uapi/linux/serio.h
@@ -76,5 +76,6 @@
 #define SERIO_HAMPSHIRE	0x3b
 #define SERIO_PS2MULT	0x3c
 #define SERIO_TSC40	0x3d
+#define SERIO_WACOM_IV	0x3e
 
 #endif /* _UAPI_SERIO_H */
-- 
cgit v1.2.3


From ba4e9a61ad54c438d4c7b655e94e31f23a6fe13f Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Sun, 20 Jul 2014 17:27:09 -0700
Subject: Input: uinput - add UI_GET_VERSION ioctl

This ioctl is the counterpart to EVIOCGVERSION and returns the
uinput-version the kernel was compiled with.

Reviewed-by: Peter Hutterer <peter.hutterer@who-t.net>
Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/uinput.c | 6 ++++++
 include/uapi/linux/uinput.h | 9 +++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index 883f045f37df..421e29e4cd81 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -723,6 +723,12 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
 	}
 
 	switch (cmd) {
+		case UI_GET_VERSION:
+			if (put_user(UINPUT_VERSION,
+				     (unsigned int __user *)p))
+				retval = -EFAULT;
+			goto out;
+
 		case UI_DEV_CREATE:
 			retval = uinput_create_device(udev);
 			goto out;
diff --git a/include/uapi/linux/uinput.h b/include/uapi/linux/uinput.h
index 0389b489bbba..baeab83deb64 100644
--- a/include/uapi/linux/uinput.h
+++ b/include/uapi/linux/uinput.h
@@ -84,6 +84,15 @@ struct uinput_ff_erase {
  */
 #define UI_GET_SYSNAME(len)	_IOC(_IOC_READ, UINPUT_IOCTL_BASE, 300, len)
 
+/**
+ * UI_GET_VERSION - Return version of uinput protocol
+ *
+ * This writes uinput protocol version implemented by the kernel into
+ * the integer pointed to by the ioctl argument. The protocol version
+ * is hard-coded in the kernel and is independent of the uinput device.
+ */
+#define UI_GET_VERSION		_IOR(UINPUT_IOCTL_BASE, 301, unsigned int)
+
 /*
  * To write a force-feedback-capable driver, the upload_effect
  * and erase_effect callbacks in input_dev must be implemented.
-- 
cgit v1.2.3


From ff587e45a1a1690f5cd713a2782672c579460365 Mon Sep 17 00:00:00 2001
From: Vandana Kannan <vandana.kannan@intel.com>
Date: Wed, 11 Jun 2014 10:46:48 +0530
Subject: drm/crtc: Add property for aspect ratio

Added a property to enable user space to set aspect ratio.
This patch contains declaration of the property and code to create the
property.

v2: Thierry's review comments.
	- Made aspect ratio enum generic instead of HDMI/CEA specfic
	- Removed usage of temporary aspect_ratio variable

v3: Thierry's review comments.
	- Fixed indentation

v4: Thierry's review comments.
	- Return ENOMEM when property creation fails

Signed-off-by: Vandana Kannan <vandana.kannan@intel.com>
Cc: Thierry Reding <thierry.reding@gmail.com>
Reviewed-by: Thierry Reding <treding@nvidia.com>
Acked-by: Dave Airlie <airlied@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_crtc.c  | 33 +++++++++++++++++++++++++++++++++
 include/drm/drm_crtc.h      |  2 ++
 include/uapi/drm/drm_mode.h |  5 +++++
 3 files changed, 40 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 89bab66558ef..f0a777747907 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -182,6 +182,12 @@ static const struct drm_prop_enum_list drm_scaling_mode_enum_list[] =
 	{ DRM_MODE_SCALE_ASPECT, "Full aspect" },
 };
 
+static const struct drm_prop_enum_list drm_aspect_ratio_enum_list[] = {
+	{ DRM_MODE_PICTURE_ASPECT_NONE, "Automatic" },
+	{ DRM_MODE_PICTURE_ASPECT_4_3, "4:3" },
+	{ DRM_MODE_PICTURE_ASPECT_16_9, "16:9" },
+};
+
 /*
  * Non-global properties, but "required" for certain connectors.
  */
@@ -1390,6 +1396,33 @@ int drm_mode_create_scaling_mode_property(struct drm_device *dev)
 }
 EXPORT_SYMBOL(drm_mode_create_scaling_mode_property);
 
+/**
+ * drm_mode_create_aspect_ratio_property - create aspect ratio property
+ * @dev: DRM device
+ *
+ * Called by a driver the first time it's needed, must be attached to desired
+ * connectors.
+ *
+ * Returns:
+ * Zero on success, errno on failure.
+ */
+int drm_mode_create_aspect_ratio_property(struct drm_device *dev)
+{
+	if (dev->mode_config.aspect_ratio_property)
+		return 0;
+
+	dev->mode_config.aspect_ratio_property =
+		drm_property_create_enum(dev, 0, "aspect ratio",
+				drm_aspect_ratio_enum_list,
+				ARRAY_SIZE(drm_aspect_ratio_enum_list));
+
+	if (dev->mode_config.aspect_ratio_property == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_mode_create_aspect_ratio_property);
+
 /**
  * drm_mode_create_dirty_property - create dirty property
  * @dev: DRM device
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 08ed55e02762..be7114e76d1b 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -835,6 +835,7 @@ struct drm_mode_config {
 
 	/* Optional properties */
 	struct drm_property *scaling_mode_property;
+	struct drm_property *aspect_ratio_property;
 	struct drm_property *dirty_info_property;
 
 	/* dumb ioctl parameters */
@@ -1023,6 +1024,7 @@ extern int drm_mode_create_dvi_i_properties(struct drm_device *dev);
 extern int drm_mode_create_tv_properties(struct drm_device *dev, int num_formats,
 				     char *formats[]);
 extern int drm_mode_create_scaling_mode_property(struct drm_device *dev);
+extern int drm_mode_create_aspect_ratio_property(struct drm_device *dev);
 extern int drm_mode_create_dirty_info_property(struct drm_device *dev);
 
 extern int drm_mode_connector_attach_encoder(struct drm_connector *connector,
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index def54f9e07ca..a0db2d4aa5f0 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -88,6 +88,11 @@
 #define DRM_MODE_SCALE_CENTER		2 /* Centered, no scaling */
 #define DRM_MODE_SCALE_ASPECT		3 /* Full screen, preserve aspect */
 
+/* Picture aspect ratio options */
+#define DRM_MODE_PICTURE_ASPECT_NONE	0
+#define DRM_MODE_PICTURE_ASPECT_4_3	1
+#define DRM_MODE_PICTURE_ASPECT_16_9	2
+
 /* Dithering mode options */
 #define DRM_MODE_DITHERING_OFF	0
 #define DRM_MODE_DITHERING_ON	1
-- 
cgit v1.2.3


From 699a0ea0823d32030b0666b28ff8633960f7ffa7 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 2 Jun 2014 11:02:59 +1000
Subject: KVM: PPC: Book3S: Controls for in-kernel sPAPR hypercall handling

This provides a way for userspace controls which sPAPR hcalls get
handled in the kernel.  Each hcall can be individually enabled or
disabled for in-kernel handling, except for H_RTAS.  The exception
for H_RTAS is because userspace can already control whether
individual RTAS functions are handled in-kernel or not via the
KVM_PPC_RTAS_DEFINE_TOKEN ioctl, and because the numeric value for
H_RTAS is out of the normal sequence of hcall numbers.

Hcalls are enabled or disabled using the KVM_ENABLE_CAP ioctl for the
KVM_CAP_PPC_ENABLE_HCALL capability on the file descriptor for the VM.
The args field of the struct kvm_enable_cap specifies the hcall number
in args[0] and the enable/disable flag in args[1]; 0 means disable
in-kernel handling (so that the hcall will always cause an exit to
userspace) and 1 means enable.  Enabling or disabling in-kernel
handling of an hcall is effective across the whole VM.

The ability for KVM_ENABLE_CAP to be used on a VM file descriptor
on PowerPC is new, added by this commit.  The KVM_CAP_ENABLE_CAP_VM
capability advertises that this ability exists.

When a VM is created, an initial set of hcalls are enabled for
in-kernel handling.  The set that is enabled is the set that have
an in-kernel implementation at this point.  Any new hcall
implementations from this point onwards should not be added to the
default set without a good reason.

No distinction is made between real-mode and virtual-mode hcall
implementations; the one setting controls them both.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/virtual/kvm/api.txt       | 41 ++++++++++++++++++++++++--
 arch/powerpc/include/asm/kvm_book3s.h   |  1 +
 arch/powerpc/include/asm/kvm_host.h     |  2 ++
 arch/powerpc/kernel/asm-offsets.c       |  1 +
 arch/powerpc/kvm/book3s_hv.c            | 51 +++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 11 +++++++
 arch/powerpc/kvm/book3s_pr.c            |  5 ++++
 arch/powerpc/kvm/book3s_pr_papr.c       | 37 ++++++++++++++++++++++++
 arch/powerpc/kvm/powerpc.c              | 45 +++++++++++++++++++++++++++++
 include/uapi/linux/kvm.h                |  1 +
 10 files changed, 193 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 0fe36497642c..5c54d196f4c8 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2863,8 +2863,8 @@ The fields in each entry are defined as follows:
          this function/index combination
 
 
-6. Capabilities that can be enabled
------------------------------------
+6. Capabilities that can be enabled on vCPUs
+--------------------------------------------
 
 There are certain capabilities that change the behavior of the virtual CPU when
 enabled. To enable them, please see section 4.37. Below you can find a list of
@@ -3002,3 +3002,40 @@ Parameters: args[0] is the XICS device fd
             args[1] is the XICS CPU number (server ID) for this vcpu
 
 This capability connects the vcpu to an in-kernel XICS device.
+
+
+7. Capabilities that can be enabled on VMs
+------------------------------------------
+
+There are certain capabilities that change the behavior of the virtual
+machine when enabled. To enable them, please see section 4.37. Below
+you can find a list of capabilities and what their effect on the VM
+is when enabling them.
+
+The following information is provided along with the description:
+
+  Architectures: which instruction set architectures provide this ioctl.
+      x86 includes both i386 and x86_64.
+
+  Parameters: what parameters are accepted by the capability.
+
+  Returns: the return value.  General error numbers (EBADF, ENOMEM, EINVAL)
+      are not detailed, but errors with specific meanings are.
+
+
+7.1 KVM_CAP_PPC_ENABLE_HCALL
+
+Architectures: ppc
+Parameters: args[0] is the sPAPR hcall number
+	    args[1] is 0 to disable, 1 to enable in-kernel handling
+
+This capability controls whether individual sPAPR hypercalls (hcalls)
+get handled by the kernel or not.  Enabling or disabling in-kernel
+handling of an hcall is effective across the VM.  On creation, an
+initial set of hcalls are enabled for in-kernel handling, which
+consists of those hcalls for which in-kernel handlers were implemented
+before this capability was implemented.  If disabled, the kernel will
+not to attempt to handle the hcall, but will always exit to userspace
+to handle it.  Note that it may not make sense to enable some and
+disable others of a group of related hcalls, but KVM does not prevent
+userspace from doing that.
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index a20cc0bbd048..052ab2ad49b5 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -187,6 +187,7 @@ extern void kvmppc_hv_entry_trampoline(void);
 extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);
 extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst);
 extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
+extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm);
 extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
 				 struct kvm_vcpu *vcpu);
 extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index f9ae69682ce1..62b2cee450a5 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -34,6 +34,7 @@
 #include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
+#include <asm/hvcall.h>
 
 #define KVM_MAX_VCPUS		NR_CPUS
 #define KVM_MAX_VCORES		NR_CPUS
@@ -263,6 +264,7 @@ struct kvm_arch {
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct list_head spapr_tce_tables;
 	struct list_head rtas_tokens;
+	DECLARE_BITMAP(enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
 #endif
 #ifdef CONFIG_KVM_MPIC
 	struct openpic *mpic;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index f5995a912213..17ffcb4f27f9 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -493,6 +493,7 @@ int main(void)
 	DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
 	DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
 	DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
+	DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
 	DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
 	DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
 	DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1562acfa05bf..cf445d22570f 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -67,6 +67,8 @@
 /* Used as a "null" value for timebase values */
 #define TB_NIL	(~(u64)0)
 
+static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
+
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
@@ -562,6 +564,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 	struct kvm_vcpu *tvcpu;
 	int idx, rc;
 
+	if (req <= MAX_HCALL_OPCODE &&
+	    !test_bit(req/4, vcpu->kvm->arch.enabled_hcalls))
+		return RESUME_HOST;
+
 	switch (req) {
 	case H_ENTER:
 		idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -2269,6 +2275,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 	 */
 	cpumask_setall(&kvm->arch.need_tlb_flush);
 
+	/* Start out with the default set of hcalls enabled */
+	memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
+	       sizeof(kvm->arch.enabled_hcalls));
+
 	kvm->arch.rma = NULL;
 
 	kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
@@ -2407,6 +2417,45 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
 	return r;
 }
 
+/*
+ * List of hcall numbers to enable by default.
+ * For compatibility with old userspace, we enable by default
+ * all hcalls that were implemented before the hcall-enabling
+ * facility was added.  Note this list should not include H_RTAS.
+ */
+static unsigned int default_hcall_list[] = {
+	H_REMOVE,
+	H_ENTER,
+	H_READ,
+	H_PROTECT,
+	H_BULK_REMOVE,
+	H_GET_TCE,
+	H_PUT_TCE,
+	H_SET_DABR,
+	H_SET_XDABR,
+	H_CEDE,
+	H_PROD,
+	H_CONFER,
+	H_REGISTER_VPA,
+#ifdef CONFIG_KVM_XICS
+	H_EOI,
+	H_CPPR,
+	H_IPI,
+	H_IPOLL,
+	H_XIRR,
+	H_XIRR_X,
+#endif
+	0
+};
+
+static void init_default_hcalls(void)
+{
+	int i;
+
+	for (i = 0; default_hcall_list[i]; ++i)
+		__set_bit(default_hcall_list[i] / 4, default_enabled_hcalls);
+}
+
 static struct kvmppc_ops kvm_ops_hv = {
 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
@@ -2454,6 +2503,8 @@ static int kvmppc_book3s_init_hv(void)
 	kvm_ops_hv.owner = THIS_MODULE;
 	kvmppc_hv_ops = &kvm_ops_hv;
 
+	init_default_hcalls();
+
 	r = kvmppc_mmu_hv_init();
 	return r;
 }
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 64ac56f6c3fe..33aaadef7139 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1909,6 +1909,17 @@ hcall_try_real_mode:
 	clrrdi	r3,r3,2
 	cmpldi	r3,hcall_real_table_end - hcall_real_table
 	bge	guest_exit_cont
+	/* See if this hcall is enabled for in-kernel handling */
+	ld	r4, VCPU_KVM(r9)
+	srdi	r0, r3, 8	/* r0 = (r3 / 4) >> 6 */
+	sldi	r0, r0, 3	/* index into kvm->arch.enabled_hcalls[] */
+	add	r4, r4, r0
+	ld	r0, KVM_ENABLED_HCALLS(r4)
+	rlwinm	r4, r3, 32-2, 0x3f	/* r4 = (r3 / 4) & 0x3f */
+	srd	r0, r0, r4
+	andi.	r0, r0, 1
+	beq	guest_exit_cont
+	/* Get pointer to handler, if any, and call it */
 	LOAD_REG_ADDR(r4, hcall_real_table)
 	lwax	r3,r3,r4
 	cmpwi	r3,0
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 3b82e8616dfa..123ac7dc5e1f 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1597,6 +1597,11 @@ static int kvmppc_core_init_vm_pr(struct kvm *kvm)
 {
 	mutex_init(&kvm->arch.hpt_mutex);
 
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* Start out with the default set of hcalls enabled */
+	kvmppc_pr_init_default_hcalls(kvm);
+#endif
+
 	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
 		spin_lock(&kvm_global_user_count_lock);
 		if (++kvm_global_user_count == 1)
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index f7c25c625a5b..eacaa6e4876e 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -267,6 +267,10 @@ static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
 
 int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 {
+	if (cmd <= MAX_HCALL_OPCODE &&
+	    !test_bit(cmd/4, vcpu->kvm->arch.enabled_hcalls))
+		return EMULATE_FAIL;
+
 	switch (cmd) {
 	case H_ENTER:
 		return kvmppc_h_pr_enter(vcpu);
@@ -304,3 +308,36 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 
 	return EMULATE_FAIL;
 }
+
+
+/*
+ * List of hcall numbers to enable by default.
+ * For compatibility with old userspace, we enable by default
+ * all hcalls that were implemented before the hcall-enabling
+ * facility was added.  Note this list should not include H_RTAS.
+ */
+static unsigned int default_hcall_list[] = {
+	H_ENTER,
+	H_REMOVE,
+	H_PROTECT,
+	H_BULK_REMOVE,
+	H_PUT_TCE,
+	H_CEDE,
+#ifdef CONFIG_KVM_XICS
+	H_XIRR,
+	H_CPPR,
+	H_EOI,
+	H_IPI,
+	H_IPOLL,
+	H_XIRR_X,
+#endif
+	0
+};
+
+void kvmppc_pr_init_default_hcalls(struct kvm *kvm)
+{
+	int i;
+
+	for (i = 0; default_hcall_list[i]; ++i)
+		__set_bit(default_hcall_list[i] / 4, kvm->arch.enabled_hcalls);
+}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 61c738ab1283..3222a4d08a6f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -387,6 +387,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PPC_UNSET_IRQ:
 	case KVM_CAP_PPC_IRQ_LEVEL:
 	case KVM_CAP_ENABLE_CAP:
+	case KVM_CAP_ENABLE_CAP_VM:
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_DEVICE_CTRL:
@@ -417,6 +418,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PPC_ALLOC_HTAB:
 	case KVM_CAP_PPC_RTAS:
 	case KVM_CAP_PPC_FIXUP_HCALL:
+	case KVM_CAP_PPC_ENABLE_HCALL:
 #ifdef CONFIG_KVM_XICS
 	case KVM_CAP_IRQ_XICS:
 #endif
@@ -1099,6 +1101,40 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
 	return 0;
 }
 
+
+static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+				   struct kvm_enable_cap *cap)
+{
+	int r;
+
+	if (cap->flags)
+		return -EINVAL;
+
+	switch (cap->cap) {
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+	case KVM_CAP_PPC_ENABLE_HCALL: {
+		unsigned long hcall = cap->args[0];
+
+		r = -EINVAL;
+		if (hcall > MAX_HCALL_OPCODE || (hcall & 3) ||
+		    cap->args[1] > 1)
+			break;
+		if (cap->args[1])
+			set_bit(hcall / 4, kvm->arch.enabled_hcalls);
+		else
+			clear_bit(hcall / 4, kvm->arch.enabled_hcalls);
+		r = 0;
+		break;
+	}
+#endif
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
                        unsigned int ioctl, unsigned long arg)
 {
@@ -1118,6 +1154,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
 		break;
 	}
+	case KVM_ENABLE_CAP:
+	{
+		struct kvm_enable_cap cap;
+		r = -EFAULT;
+		if (copy_from_user(&cap, argp, sizeof(cap)))
+			goto out;
+		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+		break;
+	}
 #ifdef CONFIG_PPC_BOOK3S_64
 	case KVM_CREATE_SPAPR_TCE: {
 		struct kvm_create_spapr_tce create_tce;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index e11d8f170a62..0418b746cb68 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -758,6 +758,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_VM_ATTRIBUTES 101
 #define KVM_CAP_ARM_PSCI_0_2 102
 #define KVM_CAP_PPC_FIXUP_HCALL 103
+#define KVM_CAP_PPC_ENABLE_HCALL 104
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 92b591a4c46b103ebd3fc0d03a084e1efd331253 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 14 Jul 2014 18:33:08 +0200
Subject: KVM: Allow KVM_CHECK_EXTENSION on the vm fd

The KVM_CHECK_EXTENSION is only available on the kvm fd today. Unfortunately
on PPC some of the capabilities change depending on the way a VM was created.

So instead we need a way to expose capabilities as VM ioctl, so that we can
see which VM type we're using (HV or PR). To enable this, add the
KVM_CHECK_EXTENSION ioctl to our vm ioctl portfolio.

Signed-off-by: Alexander Graf <agraf@suse.de>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt |  7 +++--
 include/uapi/linux/kvm.h          |  1 +
 virt/kvm/kvm_main.c               | 58 +++++++++++++++++++++------------------
 3 files changed, 37 insertions(+), 29 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 884f819e1908..8898caf8c090 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -148,9 +148,9 @@ of banks, as set via the KVM_X86_SETUP_MCE ioctl.
 
 4.4 KVM_CHECK_EXTENSION
 
-Capability: basic
+Capability: basic, KVM_CAP_CHECK_EXTENSION_VM for vm ioctl
 Architectures: all
-Type: system ioctl
+Type: system ioctl, vm ioctl
 Parameters: extension identifier (KVM_CAP_*)
 Returns: 0 if unsupported; 1 (or some other positive integer) if supported
 
@@ -160,6 +160,9 @@ receives an integer that describes the extension availability.
 Generally 0 means no and 1 means yes, but some extensions may report
 additional information in the integer return value.
 
+Based on their initialization different VMs may have different capabilities.
+It is thus encouraged to use the vm ioctl to query for capabilities (available
+with KVM_CAP_CHECK_EXTENSION_VM on the vm fd)
 
 4.5 KVM_GET_VCPU_MMAP_SIZE
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0418b746cb68..51776cac6a9b 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -759,6 +759,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_ARM_PSCI_0_2 102
 #define KVM_CAP_PPC_FIXUP_HCALL 103
 #define KVM_CAP_PPC_ENABLE_HCALL 104
+#define KVM_CAP_CHECK_EXTENSION_VM 105
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e28f3caa539d..1b95cc926cfc 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2324,6 +2324,34 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	return 0;
 }
 
+static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
+{
+	switch (arg) {
+	case KVM_CAP_USER_MEMORY:
+	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
+	case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
+	case KVM_CAP_SET_BOOT_CPU_ID:
+#endif
+	case KVM_CAP_INTERNAL_ERROR_DATA:
+#ifdef CONFIG_HAVE_KVM_MSI
+	case KVM_CAP_SIGNAL_MSI:
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+	case KVM_CAP_IRQFD_RESAMPLE:
+#endif
+	case KVM_CAP_CHECK_EXTENSION_VM:
+		return 1;
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+	case KVM_CAP_IRQ_ROUTING:
+		return KVM_MAX_IRQ_ROUTES;
+#endif
+	default:
+		break;
+	}
+	return kvm_vm_ioctl_check_extension(kvm, arg);
+}
+
 static long kvm_vm_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -2487,6 +2515,9 @@ static long kvm_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_CHECK_EXTENSION:
+		r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
+		break;
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 		if (r == -ENOTTY)
@@ -2571,33 +2602,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 	return r;
 }
 
-static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
-{
-	switch (arg) {
-	case KVM_CAP_USER_MEMORY:
-	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
-	case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
-#ifdef CONFIG_KVM_APIC_ARCHITECTURE
-	case KVM_CAP_SET_BOOT_CPU_ID:
-#endif
-	case KVM_CAP_INTERNAL_ERROR_DATA:
-#ifdef CONFIG_HAVE_KVM_MSI
-	case KVM_CAP_SIGNAL_MSI:
-#endif
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-	case KVM_CAP_IRQFD_RESAMPLE:
-#endif
-		return 1;
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-	case KVM_CAP_IRQ_ROUTING:
-		return KVM_MAX_IRQ_ROUTES;
-#endif
-	default:
-		break;
-	}
-	return kvm_vm_ioctl_check_extension(kvm, arg);
-}
-
 static long kvm_dev_ioctl(struct file *filp,
 			  unsigned int ioctl, unsigned long arg)
 {
-- 
cgit v1.2.3


From ce91ddc471b77ec75e5b2a43c803efac605f37b3 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 28 Jul 2014 19:29:13 +0200
Subject: KVM: PPC: Remove DCR handling

DCR handling was only needed for 440 KVM. Since we removed it, we can also
remove handling of DCR accesses.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/virtual/kvm/api.txt   |  6 +++---
 arch/powerpc/include/asm/kvm_host.h |  4 ----
 arch/powerpc/include/asm/kvm_ppc.h  |  1 -
 arch/powerpc/kvm/booke.c            |  5 -----
 arch/powerpc/kvm/powerpc.c          | 10 ----------
 arch/powerpc/kvm/timing.c           |  1 -
 arch/powerpc/kvm/timing.h           |  3 ---
 include/uapi/linux/kvm.h            |  4 ++--
 8 files changed, 5 insertions(+), 29 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 8898caf8c090..a21ff2265c21 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2613,8 +2613,8 @@ The 'data' member contains, in its first 'len' bytes, the value as it would
 appear if the VCPU performed a load or store of the appropriate width directly
 to the byte array.
 
-NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_DCR,
-      KVM_EXIT_PAPR and KVM_EXIT_EPR the corresponding
+NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI KVM_EXIT_PAPR and
+      KVM_EXIT_EPR the corresponding
 operations are complete (and guest state is consistent) only after userspace
 has re-entered the kernel with KVM_RUN.  The kernel side will first finish
 incomplete operations and then check for pending signals.  Userspace
@@ -2685,7 +2685,7 @@ Principles of Operation Book in the Chapter for Dynamic Address Translation
 			__u8  is_write;
 		} dcr;
 
-powerpc specific.
+Deprecated - was used for 440 KVM.
 
 		/* KVM_EXIT_OSI */
 		struct {
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 66f5b5914e6c..98d9dd50d063 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -94,7 +94,6 @@ struct kvm_vm_stat {
 struct kvm_vcpu_stat {
 	u32 sum_exits;
 	u32 mmio_exits;
-	u32 dcr_exits;
 	u32 signal_exits;
 	u32 light_exits;
 	/* Account for special types of light exits: */
@@ -126,7 +125,6 @@ struct kvm_vcpu_stat {
 
 enum kvm_exit_types {
 	MMIO_EXITS,
-	DCR_EXITS,
 	SIGNAL_EXITS,
 	ITLB_REAL_MISS_EXITS,
 	ITLB_VIRT_MISS_EXITS,
@@ -601,8 +599,6 @@ struct kvm_vcpu_arch {
 	u8 io_gpr; /* GPR used as IO source/target */
 	u8 mmio_is_bigendian;
 	u8 mmio_sign_extend;
-	u8 dcr_needed;
-	u8 dcr_is_write;
 	u8 osi_needed;
 	u8 osi_enabled;
 	u8 papr_enabled;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index cbee4538307f..8e36c1e2c631 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -41,7 +41,6 @@
 enum emulation_result {
 	EMULATE_DONE,         /* no further processing */
 	EMULATE_DO_MMIO,      /* kvm_run filled with MMIO request */
-	EMULATE_DO_DCR,       /* kvm_run filled with DCR request */
 	EMULATE_FAIL,         /* can't emulate this instruction */
 	EMULATE_AGAIN,        /* something went wrong. go again */
 	EMULATE_EXIT_USER,    /* emulation requires exit to user-space */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index f30948a17c03..b4c89fa6f109 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -51,7 +51,6 @@ unsigned long kvmppc_booke_handlers;
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "mmio",       VCPU_STAT(mmio_exits) },
-	{ "dcr",        VCPU_STAT(dcr_exits) },
 	{ "sig",        VCPU_STAT(signal_exits) },
 	{ "itlb_r",     VCPU_STAT(itlb_real_miss_exits) },
 	{ "itlb_v",     VCPU_STAT(itlb_virt_miss_exits) },
@@ -709,10 +708,6 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	case EMULATE_AGAIN:
 		return RESUME_GUEST;
 
-	case EMULATE_DO_DCR:
-		run->exit_reason = KVM_EXIT_DCR;
-		return RESUME_HOST;
-
 	case EMULATE_FAIL:
 		printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
 		       __func__, vcpu->arch.pc, vcpu->arch.last_inst);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index c14ed15fd60b..288b4bb05cbd 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -743,12 +743,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 #endif
 }
 
-static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
-                                     struct kvm_run *run)
-{
-	kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data);
-}
-
 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
                                       struct kvm_run *run)
 {
@@ -945,10 +939,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		if (!vcpu->mmio_is_write)
 			kvmppc_complete_mmio_load(vcpu, run);
 		vcpu->mmio_needed = 0;
-	} else if (vcpu->arch.dcr_needed) {
-		if (!vcpu->arch.dcr_is_write)
-			kvmppc_complete_dcr_load(vcpu, run);
-		vcpu->arch.dcr_needed = 0;
 	} else if (vcpu->arch.osi_needed) {
 		u64 *gprs = run->osi.gprs;
 		int i;
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index 07b6110a4bb7..e44d2b2ea97e 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -110,7 +110,6 @@ void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
 
 static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
 	[MMIO_EXITS] =              "MMIO",
-	[DCR_EXITS] =               "DCR",
 	[SIGNAL_EXITS] =            "SIGNAL",
 	[ITLB_REAL_MISS_EXITS] =    "ITLBREAL",
 	[ITLB_VIRT_MISS_EXITS] =    "ITLBVIRT",
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
index bf191e72b2d8..3123690c82dc 100644
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -63,9 +63,6 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
 	case EMULATED_INST_EXITS:
 		vcpu->stat.emulated_inst_exits++;
 		break;
-	case DCR_EXITS:
-		vcpu->stat.dcr_exits++;
-		break;
 	case DSI_EXITS:
 		vcpu->stat.dsi_exits++;
 		break;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 51776cac6a9b..f6f24aeb9e1a 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -162,7 +162,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_TPR_ACCESS       12
 #define KVM_EXIT_S390_SIEIC       13
 #define KVM_EXIT_S390_RESET       14
-#define KVM_EXIT_DCR              15
+#define KVM_EXIT_DCR              15 /* deprecated */
 #define KVM_EXIT_NMI              16
 #define KVM_EXIT_INTERNAL_ERROR   17
 #define KVM_EXIT_OSI              18
@@ -268,7 +268,7 @@ struct kvm_run {
 			__u64 trans_exc_code;
 			__u32 pgm_code;
 		} s390_ucontrol;
-		/* KVM_EXIT_DCR */
+		/* KVM_EXIT_DCR (deprecated) */
 		struct {
 			__u32 dcrn;
 			__u32 data;
-- 
cgit v1.2.3


From 7e6edb9b2e0bcfb2a588db390c44d120213c57ae Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Thu, 31 Jul 2014 11:01:28 +0300
Subject: IB/core: Add user MR re-registration support

Memory re-registration is a feature that enables changing the
attributes of a memory region registered by user-space, including PD,
translation (address and length) and access flags.

Add the required support in uverbs and the kernel verbs API.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/core/uverbs.h      |  1 +
 drivers/infiniband/core/uverbs_cmd.c  | 93 +++++++++++++++++++++++++++++++++++
 drivers/infiniband/core/uverbs_main.c |  1 +
 include/rdma/ib_verbs.h               | 10 +++-
 include/uapi/rdma/ib_user_verbs.h     | 16 ++++++
 5 files changed, 120 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index a283274a5a09..643c08a025a5 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -221,6 +221,7 @@ IB_UVERBS_DECLARE_CMD(query_port);
 IB_UVERBS_DECLARE_CMD(alloc_pd);
 IB_UVERBS_DECLARE_CMD(dealloc_pd);
 IB_UVERBS_DECLARE_CMD(reg_mr);
+IB_UVERBS_DECLARE_CMD(rereg_mr);
 IB_UVERBS_DECLARE_CMD(dereg_mr);
 IB_UVERBS_DECLARE_CMD(alloc_mw);
 IB_UVERBS_DECLARE_CMD(dealloc_mw);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index ea6203ee7bcc..0600c50e6215 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1002,6 +1002,99 @@ err_free:
 	return ret;
 }
 
+ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
+			   const char __user *buf, int in_len,
+			   int out_len)
+{
+	struct ib_uverbs_rereg_mr      cmd;
+	struct ib_uverbs_rereg_mr_resp resp;
+	struct ib_udata              udata;
+	struct ib_pd                *pd = NULL;
+	struct ib_mr                *mr;
+	struct ib_pd		    *old_pd;
+	int                          ret;
+	struct ib_uobject	    *uobj;
+
+	if (out_len < sizeof(resp))
+		return -ENOSPC;
+
+	if (copy_from_user(&cmd, buf, sizeof(cmd)))
+		return -EFAULT;
+
+	INIT_UDATA(&udata, buf + sizeof(cmd),
+		   (unsigned long) cmd.response + sizeof(resp),
+		   in_len - sizeof(cmd), out_len - sizeof(resp));
+
+	if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
+		return -EINVAL;
+
+	if ((cmd.flags & IB_MR_REREG_TRANS) &&
+	    (!cmd.start || !cmd.hca_va || 0 >= cmd.length ||
+	     (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
+			return -EINVAL;
+
+	uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle,
+			      file->ucontext);
+
+	if (!uobj)
+		return -EINVAL;
+
+	mr = uobj->object;
+
+	if (cmd.flags & IB_MR_REREG_ACCESS) {
+		ret = ib_check_mr_access(cmd.access_flags);
+		if (ret)
+			goto put_uobjs;
+	}
+
+	if (cmd.flags & IB_MR_REREG_PD) {
+		pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+		if (!pd) {
+			ret = -EINVAL;
+			goto put_uobjs;
+		}
+	}
+
+	if (atomic_read(&mr->usecnt)) {
+		ret = -EBUSY;
+		goto put_uobj_pd;
+	}
+
+	old_pd = mr->pd;
+	ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
+					cmd.length, cmd.hca_va,
+					cmd.access_flags, pd, &udata);
+	if (!ret) {
+		if (cmd.flags & IB_MR_REREG_PD) {
+			atomic_inc(&pd->usecnt);
+			mr->pd = pd;
+			atomic_dec(&old_pd->usecnt);
+		}
+	} else {
+		goto put_uobj_pd;
+	}
+
+	memset(&resp, 0, sizeof(resp));
+	resp.lkey      = mr->lkey;
+	resp.rkey      = mr->rkey;
+
+	if (copy_to_user((void __user *)(unsigned long)cmd.response,
+			 &resp, sizeof(resp)))
+		ret = -EFAULT;
+	else
+		ret = in_len;
+
+put_uobj_pd:
+	if (cmd.flags & IB_MR_REREG_PD)
+		put_pd_read(pd);
+
+put_uobjs:
+
+	put_uobj_write(mr->uobject);
+
+	return ret;
+}
+
 ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
 			   const char __user *buf, int in_len,
 			   int out_len)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 08219fb3338b..c73b22a257fe 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -87,6 +87,7 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
 	[IB_USER_VERBS_CMD_ALLOC_PD]		= ib_uverbs_alloc_pd,
 	[IB_USER_VERBS_CMD_DEALLOC_PD]		= ib_uverbs_dealloc_pd,
 	[IB_USER_VERBS_CMD_REG_MR]		= ib_uverbs_reg_mr,
+	[IB_USER_VERBS_CMD_REREG_MR]		= ib_uverbs_rereg_mr,
 	[IB_USER_VERBS_CMD_DEREG_MR]		= ib_uverbs_dereg_mr,
 	[IB_USER_VERBS_CMD_ALLOC_MW]		= ib_uverbs_alloc_mw,
 	[IB_USER_VERBS_CMD_DEALLOC_MW]		= ib_uverbs_dealloc_mw,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 7ccef342f724..ed44cc07a7b3 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1097,7 +1097,8 @@ struct ib_mr_attr {
 enum ib_mr_rereg_flags {
 	IB_MR_REREG_TRANS	= 1,
 	IB_MR_REREG_PD		= (1<<1),
-	IB_MR_REREG_ACCESS	= (1<<2)
+	IB_MR_REREG_ACCESS	= (1<<2),
+	IB_MR_REREG_SUPPORTED	= ((IB_MR_REREG_ACCESS << 1) - 1)
 };
 
 /**
@@ -1547,6 +1548,13 @@ struct ib_device {
 						  u64 virt_addr,
 						  int mr_access_flags,
 						  struct ib_udata *udata);
+	int			   (*rereg_user_mr)(struct ib_mr *mr,
+						    int flags,
+						    u64 start, u64 length,
+						    u64 virt_addr,
+						    int mr_access_flags,
+						    struct ib_pd *pd,
+						    struct ib_udata *udata);
 	int                        (*query_mr)(struct ib_mr *mr,
 					       struct ib_mr_attr *mr_attr);
 	int                        (*dereg_mr)(struct ib_mr *mr);
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index cbfdd4ca9510..26daf55ff76e 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -276,6 +276,22 @@ struct ib_uverbs_reg_mr_resp {
 	__u32 rkey;
 };
 
+struct ib_uverbs_rereg_mr {
+	__u64 response;
+	__u32 mr_handle;
+	__u32 flags;
+	__u64 start;
+	__u64 length;
+	__u64 hca_va;
+	__u32 pd_handle;
+	__u32 access_flags;
+};
+
+struct ib_uverbs_rereg_mr_resp {
+	__u32 lkey;
+	__u32 rkey;
+};
+
 struct ib_uverbs_dereg_mr {
 	__u32 mr_handle;
 };
-- 
cgit v1.2.3


From 7678d71fb4bd6abe6ccb46afe7d90b3ed01ee936 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 3 Jun 2014 14:56:57 +0200
Subject: drm/tegra: Add SET/GET_TILING IOCTLs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently the tiling parameters of buffer objects can only be set at
allocation time, and only a single tiled mode is supported. This new
DRM_TEGRA_GEM_SET_TILING IOCTL allows more modes to be set and also
allows the tiling mode to be changed after the allocation. This will
enable the Tegra DRM driver to import buffers from a GPU and directly
scan them out by configuring the display controller appropriately.

To complement this, the DRM_TEGRA_GEM_GET_TILING IOCTL can query the
current tiling mode of a buffer object. This is necessary when importing
buffers via handle (as is done in Mesa for example) so that userspace
can determine the proper parameters for the 2D or 3D engines.

Reviewed-by: Stéphane Marchesin <marcheu@chromium.org>
Tested-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.c  | 95 ++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/drm/tegra_drm.h | 25 ++++++++++++
 2 files changed, 120 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index fd736efd14bd..a029525b598f 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -455,6 +455,99 @@ static int tegra_get_syncpt_base(struct drm_device *drm, void *data,
 
 	return 0;
 }
+
+static int tegra_gem_set_tiling(struct drm_device *drm, void *data,
+				struct drm_file *file)
+{
+	struct drm_tegra_gem_set_tiling *args = data;
+	enum tegra_bo_tiling_mode mode;
+	struct drm_gem_object *gem;
+	unsigned long value = 0;
+	struct tegra_bo *bo;
+
+	switch (args->mode) {
+	case DRM_TEGRA_GEM_TILING_MODE_PITCH:
+		mode = TEGRA_BO_TILING_MODE_PITCH;
+
+		if (args->value != 0)
+			return -EINVAL;
+
+		break;
+
+	case DRM_TEGRA_GEM_TILING_MODE_TILED:
+		mode = TEGRA_BO_TILING_MODE_TILED;
+
+		if (args->value != 0)
+			return -EINVAL;
+
+		break;
+
+	case DRM_TEGRA_GEM_TILING_MODE_BLOCK:
+		mode = TEGRA_BO_TILING_MODE_BLOCK;
+
+		if (args->value > 5)
+			return -EINVAL;
+
+		value = args->value;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	gem = drm_gem_object_lookup(drm, file, args->handle);
+	if (!gem)
+		return -ENOENT;
+
+	bo = to_tegra_bo(gem);
+
+	bo->tiling.mode = mode;
+	bo->tiling.value = value;
+
+	drm_gem_object_unreference(gem);
+
+	return 0;
+}
+
+static int tegra_gem_get_tiling(struct drm_device *drm, void *data,
+				struct drm_file *file)
+{
+	struct drm_tegra_gem_get_tiling *args = data;
+	struct drm_gem_object *gem;
+	struct tegra_bo *bo;
+	int err = 0;
+
+	gem = drm_gem_object_lookup(drm, file, args->handle);
+	if (!gem)
+		return -ENOENT;
+
+	bo = to_tegra_bo(gem);
+
+	switch (bo->tiling.mode) {
+	case TEGRA_BO_TILING_MODE_PITCH:
+		args->mode = DRM_TEGRA_GEM_TILING_MODE_PITCH;
+		args->value = 0;
+		break;
+
+	case TEGRA_BO_TILING_MODE_TILED:
+		args->mode = DRM_TEGRA_GEM_TILING_MODE_TILED;
+		args->value = 0;
+		break;
+
+	case TEGRA_BO_TILING_MODE_BLOCK:
+		args->mode = DRM_TEGRA_GEM_TILING_MODE_BLOCK;
+		args->value = bo->tiling.value;
+		break;
+
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+	drm_gem_object_unreference(gem);
+
+	return err;
+}
 #endif
 
 static const struct drm_ioctl_desc tegra_drm_ioctls[] = {
@@ -469,6 +562,8 @@ static const struct drm_ioctl_desc tegra_drm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(TEGRA_GET_SYNCPT, tegra_get_syncpt, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(TEGRA_SUBMIT, tegra_submit, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(TEGRA_GET_SYNCPT_BASE, tegra_get_syncpt_base, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(TEGRA_GEM_SET_TILING, tegra_gem_set_tiling, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(TEGRA_GEM_GET_TILING, tegra_gem_get_tiling, DRM_UNLOCKED),
 #endif
 };
 
diff --git a/include/uapi/drm/tegra_drm.h b/include/uapi/drm/tegra_drm.h
index b75482112428..0829f75eb986 100644
--- a/include/uapi/drm/tegra_drm.h
+++ b/include/uapi/drm/tegra_drm.h
@@ -129,6 +129,27 @@ struct drm_tegra_submit {
 	__u32 reserved[5];	/* future expansion */
 };
 
+#define DRM_TEGRA_GEM_TILING_MODE_PITCH 0
+#define DRM_TEGRA_GEM_TILING_MODE_TILED 1
+#define DRM_TEGRA_GEM_TILING_MODE_BLOCK 2
+
+struct drm_tegra_gem_set_tiling {
+	/* input */
+	__u32 handle;
+	__u32 mode;
+	__u32 value;
+	__u32 pad;
+};
+
+struct drm_tegra_gem_get_tiling {
+	/* input */
+	__u32 handle;
+	/* output */
+	__u32 mode;
+	__u32 value;
+	__u32 pad;
+};
+
 #define DRM_TEGRA_GEM_CREATE		0x00
 #define DRM_TEGRA_GEM_MMAP		0x01
 #define DRM_TEGRA_SYNCPT_READ		0x02
@@ -139,6 +160,8 @@ struct drm_tegra_submit {
 #define DRM_TEGRA_GET_SYNCPT		0x07
 #define DRM_TEGRA_SUBMIT		0x08
 #define DRM_TEGRA_GET_SYNCPT_BASE	0x09
+#define DRM_TEGRA_GEM_SET_TILING	0x0a
+#define DRM_TEGRA_GEM_GET_TILING	0x0b
 
 #define DRM_IOCTL_TEGRA_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_CREATE, struct drm_tegra_gem_create)
 #define DRM_IOCTL_TEGRA_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_MMAP, struct drm_tegra_gem_mmap)
@@ -150,5 +173,7 @@ struct drm_tegra_submit {
 #define DRM_IOCTL_TEGRA_GET_SYNCPT DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GET_SYNCPT, struct drm_tegra_get_syncpt)
 #define DRM_IOCTL_TEGRA_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SUBMIT, struct drm_tegra_submit)
 #define DRM_IOCTL_TEGRA_GET_SYNCPT_BASE DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GET_SYNCPT_BASE, struct drm_tegra_get_syncpt_base)
+#define DRM_IOCTL_TEGRA_GEM_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_SET_TILING, struct drm_tegra_gem_set_tiling)
+#define DRM_IOCTL_TEGRA_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_GET_TILING, struct drm_tegra_gem_get_tiling)
 
 #endif
-- 
cgit v1.2.3


From 7b129087874b925901def7ae507f7d9fac406211 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 10 Jun 2014 12:04:03 +0200
Subject: drm/tegra: Add SET/GET_FLAGS IOCTLs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The DRM_TEGRA_GEM_SET_FLAGS IOCTL can be used to set the flags of a
buffer object after it has been allocated or imported. Flags associated
with a buffer object can be queried using the DRM_TEGRA_GEM_GET_FLAGS
IOCTL.

Reviewed-by: Stéphane Marchesin <marcheu@chromium.org>
Tested-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.c  | 49 ++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/drm/tegra_drm.h | 21 +++++++++++++++++++
 2 files changed, 70 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index a029525b598f..6e2e714a0649 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -548,6 +548,53 @@ static int tegra_gem_get_tiling(struct drm_device *drm, void *data,
 
 	return err;
 }
+
+static int tegra_gem_set_flags(struct drm_device *drm, void *data,
+			       struct drm_file *file)
+{
+	struct drm_tegra_gem_set_flags *args = data;
+	struct drm_gem_object *gem;
+	struct tegra_bo *bo;
+
+	if (args->flags & ~DRM_TEGRA_GEM_FLAGS)
+		return -EINVAL;
+
+	gem = drm_gem_object_lookup(drm, file, args->handle);
+	if (!gem)
+		return -ENOENT;
+
+	bo = to_tegra_bo(gem);
+	bo->flags = 0;
+
+	if (args->flags & DRM_TEGRA_GEM_BOTTOM_UP)
+		bo->flags |= TEGRA_BO_BOTTOM_UP;
+
+	drm_gem_object_unreference(gem);
+
+	return 0;
+}
+
+static int tegra_gem_get_flags(struct drm_device *drm, void *data,
+			       struct drm_file *file)
+{
+	struct drm_tegra_gem_get_flags *args = data;
+	struct drm_gem_object *gem;
+	struct tegra_bo *bo;
+
+	gem = drm_gem_object_lookup(drm, file, args->handle);
+	if (!gem)
+		return -ENOENT;
+
+	bo = to_tegra_bo(gem);
+	args->flags = 0;
+
+	if (bo->flags & TEGRA_BO_BOTTOM_UP)
+		args->flags |= DRM_TEGRA_GEM_BOTTOM_UP;
+
+	drm_gem_object_unreference(gem);
+
+	return 0;
+}
 #endif
 
 static const struct drm_ioctl_desc tegra_drm_ioctls[] = {
@@ -564,6 +611,8 @@ static const struct drm_ioctl_desc tegra_drm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(TEGRA_GET_SYNCPT_BASE, tegra_get_syncpt_base, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(TEGRA_GEM_SET_TILING, tegra_gem_set_tiling, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(TEGRA_GEM_GET_TILING, tegra_gem_get_tiling, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(TEGRA_GEM_SET_FLAGS, tegra_gem_set_flags, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(TEGRA_GEM_GET_FLAGS, tegra_gem_get_flags, DRM_UNLOCKED),
 #endif
 };
 
diff --git a/include/uapi/drm/tegra_drm.h b/include/uapi/drm/tegra_drm.h
index 0829f75eb986..c15d781ecc0f 100644
--- a/include/uapi/drm/tegra_drm.h
+++ b/include/uapi/drm/tegra_drm.h
@@ -150,6 +150,23 @@ struct drm_tegra_gem_get_tiling {
 	__u32 pad;
 };
 
+#define DRM_TEGRA_GEM_BOTTOM_UP		(1 << 0)
+#define DRM_TEGRA_GEM_FLAGS		(DRM_TEGRA_GEM_BOTTOM_UP)
+
+struct drm_tegra_gem_set_flags {
+	/* input */
+	__u32 handle;
+	/* output */
+	__u32 flags;
+};
+
+struct drm_tegra_gem_get_flags {
+	/* input */
+	__u32 handle;
+	/* output */
+	__u32 flags;
+};
+
 #define DRM_TEGRA_GEM_CREATE		0x00
 #define DRM_TEGRA_GEM_MMAP		0x01
 #define DRM_TEGRA_SYNCPT_READ		0x02
@@ -162,6 +179,8 @@ struct drm_tegra_gem_get_tiling {
 #define DRM_TEGRA_GET_SYNCPT_BASE	0x09
 #define DRM_TEGRA_GEM_SET_TILING	0x0a
 #define DRM_TEGRA_GEM_GET_TILING	0x0b
+#define DRM_TEGRA_GEM_SET_FLAGS		0x0c
+#define DRM_TEGRA_GEM_GET_FLAGS		0x0d
 
 #define DRM_IOCTL_TEGRA_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_CREATE, struct drm_tegra_gem_create)
 #define DRM_IOCTL_TEGRA_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_MMAP, struct drm_tegra_gem_mmap)
@@ -175,5 +194,7 @@ struct drm_tegra_gem_get_tiling {
 #define DRM_IOCTL_TEGRA_GET_SYNCPT_BASE DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GET_SYNCPT_BASE, struct drm_tegra_get_syncpt_base)
 #define DRM_IOCTL_TEGRA_GEM_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_SET_TILING, struct drm_tegra_gem_set_tiling)
 #define DRM_IOCTL_TEGRA_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_GET_TILING, struct drm_tegra_gem_get_tiling)
+#define DRM_IOCTL_TEGRA_GEM_SET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_SET_FLAGS, struct drm_tegra_gem_set_flags)
+#define DRM_IOCTL_TEGRA_GEM_GET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_GET_FLAGS, struct drm_tegra_gem_get_flags)
 
 #endif
-- 
cgit v1.2.3


From 1b69be5e8afc634f39ad695a6ab6aad0cf0975c7 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Tue, 10 Jun 2014 11:41:57 +1000
Subject: drivers/vfio: EEH support for VFIO PCI device

The patch adds new IOCTL commands for sPAPR VFIO container device
to support EEH functionality for PCI devices, which have been passed
through from host to somebody else via VFIO.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Acked-by: Alexander Graf <agraf@suse.de>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 Documentation/vfio.txt              | 87 +++++++++++++++++++++++++++++++++++--
 drivers/vfio/Makefile               |  1 +
 drivers/vfio/pci/vfio_pci.c         | 18 ++++++--
 drivers/vfio/vfio_iommu_spapr_tce.c | 17 +++++++-
 drivers/vfio/vfio_spapr_eeh.c       | 87 +++++++++++++++++++++++++++++++++++++
 include/linux/vfio.h                | 23 ++++++++++
 include/uapi/linux/vfio.h           | 34 +++++++++++++++
 7 files changed, 259 insertions(+), 8 deletions(-)
 create mode 100644 drivers/vfio/vfio_spapr_eeh.c

(limited to 'include/uapi')

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index b9ca02370d46..96978eced341 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -305,7 +305,15 @@ faster, the map/unmap handling has been implemented in real mode which provides
 an excellent performance which has limitations such as inability to do
 locked pages accounting in real time.
 
-So 3 additional ioctls have been added:
+4) According to sPAPR specification, A Partitionable Endpoint (PE) is an I/O
+subtree that can be treated as a unit for the purposes of partitioning and
+error recovery. A PE may be a single or multi-function IOA (IO Adapter), a
+function of a multi-function IOA, or multiple IOAs (possibly including switch
+and bridge structures above the multiple IOAs). PPC64 guests detect PCI errors
+and recover from them via EEH RTAS services, which works on the basis of
+additional ioctl commands.
+
+So 4 additional ioctls have been added:
 
 	VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start
 		of the DMA window on the PCI bus.
@@ -316,9 +324,12 @@ So 3 additional ioctls have been added:
 
 	VFIO_IOMMU_DISABLE - disables the container.
 
+	VFIO_EEH_PE_OP - provides an API for EEH setup, error detection and recovery.
 
 The code flow from the example above should be slightly changed:
 
+	struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
+
 	.....
 	/* Add the group to the container */
 	ioctl(group, VFIO_GROUP_SET_CONTAINER, &container);
@@ -342,9 +353,79 @@ The code flow from the example above should be slightly changed:
 	dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
 
 	/* Check here is .iova/.size are within DMA window from spapr_iommu_info */
-
 	ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map);
-	.....
+
+	/* Get a file descriptor for the device */
+	device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0");
+
+	....
+
+	/* Gratuitous device reset and go... */
+	ioctl(device, VFIO_DEVICE_RESET);
+
+	/* Make sure EEH is supported */
+	ioctl(container, VFIO_CHECK_EXTENSION, VFIO_EEH);
+
+	/* Enable the EEH functionality on the device */
+	pe_op.op = VFIO_EEH_PE_ENABLE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* You're suggested to create additional data struct to represent
+	 * PE, and put child devices belonging to same IOMMU group to the
+	 * PE instance for later reference.
+	 */
+
+	/* Check the PE's state and make sure it's in functional state */
+	pe_op.op = VFIO_EEH_PE_GET_STATE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Save device state using pci_save_state().
+	 * EEH should be enabled on the specified device.
+	 */
+
+	....
+
+	/* When 0xFF's returned from reading PCI config space or IO BARs
+	 * of the PCI device. Check the PE's state to see if that has been
+	 * frozen.
+	 */
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Waiting for pending PCI transactions to be completed and don't
+	 * produce any more PCI traffic from/to the affected PE until
+	 * recovery is finished.
+	 */
+
+	/* Enable IO for the affected PE and collect logs. Usually, the
+	 * standard part of PCI config space, AER registers are dumped
+	 * as logs for further analysis.
+	 */
+	pe_op.op = VFIO_EEH_PE_UNFREEZE_IO;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/*
+	 * Issue PE reset: hot or fundamental reset. Usually, hot reset
+	 * is enough. However, the firmware of some PCI adapters would
+	 * require fundamental reset.
+	 */
+	pe_op.op = VFIO_EEH_PE_RESET_HOT;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+	pe_op.op = VFIO_EEH_PE_RESET_DEACTIVATE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Configure the PCI bridges for the affected PE */
+	pe_op.op = VFIO_EEH_PE_CONFIGURE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Restored state we saved at initialization time. pci_restore_state()
+	 * is good enough as an example.
+	 */
+
+	/* Hopefully, error is recovered successfully. Now, you can resume to
+	 * start PCI traffic to/from the affected PE.
+	 */
+
+	....
 
 -------------------------------------------------------------------------------
 
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 72bfabc8629e..50e30bc75e85 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_VFIO) += vfio.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
 obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
+obj-$(CONFIG_EEH) += vfio_spapr_eeh.o
 obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 010e0f8b8e4f..e2ee80f36e3e 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -157,8 +157,10 @@ static void vfio_pci_release(void *device_data)
 {
 	struct vfio_pci_device *vdev = device_data;
 
-	if (atomic_dec_and_test(&vdev->refcnt))
+	if (atomic_dec_and_test(&vdev->refcnt)) {
+		vfio_spapr_pci_eeh_release(vdev->pdev);
 		vfio_pci_disable(vdev);
+	}
 
 	module_put(THIS_MODULE);
 }
@@ -166,19 +168,27 @@ static void vfio_pci_release(void *device_data)
 static int vfio_pci_open(void *device_data)
 {
 	struct vfio_pci_device *vdev = device_data;
+	int ret;
 
 	if (!try_module_get(THIS_MODULE))
 		return -ENODEV;
 
 	if (atomic_inc_return(&vdev->refcnt) == 1) {
-		int ret = vfio_pci_enable(vdev);
+		ret = vfio_pci_enable(vdev);
+		if (ret)
+			goto error;
+
+		ret = vfio_spapr_pci_eeh_open(vdev->pdev);
 		if (ret) {
-			module_put(THIS_MODULE);
-			return ret;
+			vfio_pci_disable(vdev);
+			goto error;
 		}
 	}
 
 	return 0;
+error:
+	module_put(THIS_MODULE);
+	return ret;
 }
 
 static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index a84788ba662c..730b4ef3e0cc 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -156,7 +156,16 @@ static long tce_iommu_ioctl(void *iommu_data,
 
 	switch (cmd) {
 	case VFIO_CHECK_EXTENSION:
-		return (arg == VFIO_SPAPR_TCE_IOMMU) ? 1 : 0;
+		switch (arg) {
+		case VFIO_SPAPR_TCE_IOMMU:
+			ret = 1;
+			break;
+		default:
+			ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
+			break;
+		}
+
+		return (ret < 0) ? 0 : ret;
 
 	case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
 		struct vfio_iommu_spapr_tce_info info;
@@ -283,6 +292,12 @@ static long tce_iommu_ioctl(void *iommu_data,
 		tce_iommu_disable(container);
 		mutex_unlock(&container->lock);
 		return 0;
+	case VFIO_EEH_PE_OP:
+		if (!container->tbl || !container->tbl->it_group)
+			return -ENODEV;
+
+		return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group,
+						  cmd, arg);
 	}
 
 	return -ENOTTY;
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
new file mode 100644
index 000000000000..f834b4ce1431
--- /dev/null
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -0,0 +1,87 @@
+/*
+ * EEH functionality support for VFIO devices. The feature is only
+ * available on sPAPR compatible platforms.
+ *
+ * Copyright Gavin Shan, IBM Corporation 2014.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+#include <asm/eeh.h>
+
+/* We might build address mapping here for "fast" path later */
+int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+{
+	return eeh_dev_open(pdev);
+}
+
+void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
+{
+	eeh_dev_release(pdev);
+}
+
+long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+				unsigned int cmd, unsigned long arg)
+{
+	struct eeh_pe *pe;
+	struct vfio_eeh_pe_op op;
+	unsigned long minsz;
+	long ret = -EINVAL;
+
+	switch (cmd) {
+	case VFIO_CHECK_EXTENSION:
+		if (arg == VFIO_EEH)
+			ret = eeh_enabled() ? 1 : 0;
+		else
+			ret = 0;
+		break;
+	case VFIO_EEH_PE_OP:
+		pe = eeh_iommu_group_to_pe(group);
+		if (!pe)
+			return -ENODEV;
+
+		minsz = offsetofend(struct vfio_eeh_pe_op, op);
+		if (copy_from_user(&op, (void __user *)arg, minsz))
+			return -EFAULT;
+		if (op.argsz < minsz || op.flags)
+			return -EINVAL;
+
+		switch (op.op) {
+		case VFIO_EEH_PE_DISABLE:
+			ret = eeh_pe_set_option(pe, EEH_OPT_DISABLE);
+			break;
+		case VFIO_EEH_PE_ENABLE:
+			ret = eeh_pe_set_option(pe, EEH_OPT_ENABLE);
+			break;
+		case VFIO_EEH_PE_UNFREEZE_IO:
+			ret = eeh_pe_set_option(pe, EEH_OPT_THAW_MMIO);
+			break;
+		case VFIO_EEH_PE_UNFREEZE_DMA:
+			ret = eeh_pe_set_option(pe, EEH_OPT_THAW_DMA);
+			break;
+		case VFIO_EEH_PE_GET_STATE:
+			ret = eeh_pe_get_state(pe);
+			break;
+		case VFIO_EEH_PE_RESET_DEACTIVATE:
+			ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE);
+			break;
+		case VFIO_EEH_PE_RESET_HOT:
+			ret = eeh_pe_reset(pe, EEH_RESET_HOT);
+			break;
+		case VFIO_EEH_PE_RESET_FUNDAMENTAL:
+			ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL);
+			break;
+		case VFIO_EEH_PE_CONFIGURE:
+			ret = eeh_pe_configure(pe);
+			break;
+		default:
+			ret = -EINVAL;
+		}
+	}
+
+	return ret;
+}
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 8ec980b5e3af..25a0fbd4b998 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -98,4 +98,27 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group);
 extern long vfio_external_check_extension(struct vfio_group *group,
 					  unsigned long arg);
 
+#ifdef CONFIG_EEH
+extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
+extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
+extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+				       unsigned int cmd,
+				       unsigned long arg);
+#else
+static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+{
+	return 0;
+}
+
+static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
+{
+}
+
+static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+					      unsigned int cmd,
+					      unsigned long arg)
+{
+	return -ENOTTY;
+}
+#endif /* CONFIG_EEH */
 #endif /* VFIO_H */
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index cb9023d4f063..6612974c64bf 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -30,6 +30,9 @@
  */
 #define VFIO_DMA_CC_IOMMU		4
 
+/* Check if EEH is supported */
+#define VFIO_EEH			5
+
 /*
  * The IOCTL interface is designed for extensibility by embedding the
  * structure length (argsz) and flags into structures passed between
@@ -455,6 +458,37 @@ struct vfio_iommu_spapr_tce_info {
 
 #define VFIO_IOMMU_SPAPR_TCE_GET_INFO	_IO(VFIO_TYPE, VFIO_BASE + 12)
 
+/*
+ * EEH PE operation struct provides ways to:
+ * - enable/disable EEH functionality;
+ * - unfreeze IO/DMA for frozen PE;
+ * - read PE state;
+ * - reset PE;
+ * - configure PE.
+ */
+struct vfio_eeh_pe_op {
+	__u32 argsz;
+	__u32 flags;
+	__u32 op;
+};
+
+#define VFIO_EEH_PE_DISABLE		0	/* Disable EEH functionality */
+#define VFIO_EEH_PE_ENABLE		1	/* Enable EEH functionality  */
+#define VFIO_EEH_PE_UNFREEZE_IO		2	/* Enable IO for frozen PE   */
+#define VFIO_EEH_PE_UNFREEZE_DMA	3	/* Enable DMA for frozen PE  */
+#define VFIO_EEH_PE_GET_STATE		4	/* PE state retrieval        */
+#define  VFIO_EEH_PE_STATE_NORMAL	0	/* PE in functional state    */
+#define  VFIO_EEH_PE_STATE_RESET	1	/* PE reset in progress      */
+#define  VFIO_EEH_PE_STATE_STOPPED	2	/* Stopped DMA and IO        */
+#define  VFIO_EEH_PE_STATE_STOPPED_DMA	4	/* Stopped DMA only          */
+#define  VFIO_EEH_PE_STATE_UNAVAIL	5	/* State unavailable         */
+#define VFIO_EEH_PE_RESET_DEACTIVATE	5	/* Deassert PE reset         */
+#define VFIO_EEH_PE_RESET_HOT		6	/* Assert hot reset          */
+#define VFIO_EEH_PE_RESET_FUNDAMENTAL	7	/* Assert fundamental reset  */
+#define VFIO_EEH_PE_CONFIGURE		8	/* PE configuration          */
+
+#define VFIO_EEH_PE_OP			_IO(VFIO_TYPE, VFIO_BASE + 21)
+
 /* ***************************************************************** */
 
 #endif /* _UAPIVFIO_H */
-- 
cgit v1.2.3


From 77497f2735ad6e29c55475e15e9790dbfa2c2ef8 Mon Sep 17 00:00:00 2001
From: Michel Dänzer <michel.daenzer@amd.com>
Date: Thu, 17 Jul 2014 19:01:07 +0900
Subject: drm/radeon: Pass GART page flags to radeon_gart_set_page() explicitly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/r100.c        |  2 +-
 drivers/gpu/drm/radeon/r300.c        | 12 +++++++++---
 drivers/gpu/drm/radeon/radeon.h      | 12 +++++++++---
 drivers/gpu/drm/radeon/radeon_asic.h |  8 ++++----
 drivers/gpu/drm/radeon/radeon_gart.c |  9 ++++++---
 drivers/gpu/drm/radeon/radeon_ttm.c  |  8 ++++++--
 drivers/gpu/drm/radeon/rs400.c       | 13 ++++++++++---
 drivers/gpu/drm/radeon/rs600.c       | 16 +++++++++++-----
 include/uapi/drm/radeon_drm.h        |  4 +++-
 9 files changed, 59 insertions(+), 25 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index cceef2711310..5fd242795178 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -682,7 +682,7 @@ void r100_pci_gart_disable(struct radeon_device *rdev)
 }
 
 void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
-			    uint64_t addr)
+			    uint64_t addr, uint32_t flags)
 {
 	u32 *gtt = rdev->gart.ptr;
 	gtt[i] = cpu_to_le32(lower_32_bits(addr));
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 8d14e665f241..75b30338c226 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -69,17 +69,23 @@ void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev)
 	mb();
 }
 
+#define R300_PTE_UNSNOOPED (1 << 0)
 #define R300_PTE_WRITEABLE (1 << 2)
 #define R300_PTE_READABLE  (1 << 3)
 
 void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i,
-			      uint64_t addr)
+			      uint64_t addr, uint32_t flags)
 {
 	void __iomem *ptr = rdev->gart.ptr;
 
 	addr = (lower_32_bits(addr) >> 8) |
-	       ((upper_32_bits(addr) & 0xff) << 24) |
-	       R300_PTE_WRITEABLE | R300_PTE_READABLE;
+		((upper_32_bits(addr) & 0xff) << 24);
+	if (flags & RADEON_GART_PAGE_READ)
+		addr |= R300_PTE_READABLE;
+	if (flags & RADEON_GART_PAGE_WRITE)
+		addr |= R300_PTE_WRITEABLE;
+	if (!(flags & RADEON_GART_PAGE_SNOOP))
+		addr |= R300_PTE_UNSNOOPED;
 	/* on x86 we want this to be CPU endian, on powerpc
 	 * on powerpc without HW swappers, it'll get swapped on way
 	 * into VRAM - so no need for cpu_to_le32 on VRAM tables */
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 43bc99b3926f..b1eea04a329e 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -593,6 +593,12 @@ struct radeon_mc;
 #define RADEON_GPU_PAGE_SHIFT 12
 #define RADEON_GPU_PAGE_ALIGN(a) (((a) + RADEON_GPU_PAGE_MASK) & ~RADEON_GPU_PAGE_MASK)
 
+#define RADEON_GART_PAGE_DUMMY  0
+#define RADEON_GART_PAGE_VALID	(1 << 0)
+#define RADEON_GART_PAGE_READ	(1 << 1)
+#define RADEON_GART_PAGE_WRITE	(1 << 2)
+#define RADEON_GART_PAGE_SNOOP	(1 << 3)
+
 struct radeon_gart {
 	dma_addr_t			table_addr;
 	struct radeon_bo		*robj;
@@ -617,7 +623,7 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
 			int pages);
 int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
 		     int pages, struct page **pagelist,
-		     dma_addr_t *dma_addr);
+		     dma_addr_t *dma_addr, uint32_t flags);
 
 
 /*
@@ -1784,7 +1790,7 @@ struct radeon_asic {
 	struct {
 		void (*tlb_flush)(struct radeon_device *rdev);
 		void (*set_page)(struct radeon_device *rdev, unsigned i,
-				 uint64_t addr);
+				 uint64_t addr, uint32_t flags);
 	} gart;
 	struct {
 		int (*init)(struct radeon_device *rdev);
@@ -2745,7 +2751,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
 #define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state))
 #define radeon_asic_reset(rdev) (rdev)->asic->asic_reset((rdev))
 #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart.tlb_flush((rdev))
-#define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), (i), (p))
+#define radeon_gart_set_page(rdev, i, p, f) (rdev)->asic->gart.set_page((rdev), (i), (p), (f))
 #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev))
 #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev))
 #define radeon_asic_vm_set_page(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (ib), (pe), (addr), (count), (incr), (flags)))
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 01e7c0ad8f01..f632e31b3554 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -68,7 +68,7 @@ int r100_asic_reset(struct radeon_device *rdev);
 u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc);
 void r100_pci_gart_tlb_flush(struct radeon_device *rdev);
 void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
-			    uint64_t addr);
+			    uint64_t addr, uint32_t flags);
 void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring);
 int r100_irq_set(struct radeon_device *rdev);
 int r100_irq_process(struct radeon_device *rdev);
@@ -173,7 +173,7 @@ extern void r300_fence_ring_emit(struct radeon_device *rdev,
 extern int r300_cs_parse(struct radeon_cs_parser *p);
 extern void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev);
 extern void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i,
-				     uint64_t addr);
+				     uint64_t addr, uint32_t flags);
 extern void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes);
 extern int rv370_get_pcie_lanes(struct radeon_device *rdev);
 extern void r300_set_reg_safe(struct radeon_device *rdev);
@@ -209,7 +209,7 @@ extern int rs400_suspend(struct radeon_device *rdev);
 extern int rs400_resume(struct radeon_device *rdev);
 void rs400_gart_tlb_flush(struct radeon_device *rdev);
 void rs400_gart_set_page(struct radeon_device *rdev, unsigned i,
-			 uint64_t addr);
+			 uint64_t addr, uint32_t flags);
 uint32_t rs400_mc_rreg(struct radeon_device *rdev, uint32_t reg);
 void rs400_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 int rs400_gart_init(struct radeon_device *rdev);
@@ -233,7 +233,7 @@ void rs600_irq_disable(struct radeon_device *rdev);
 u32 rs600_get_vblank_counter(struct radeon_device *rdev, int crtc);
 void rs600_gart_tlb_flush(struct radeon_device *rdev);
 void rs600_gart_set_page(struct radeon_device *rdev, unsigned i,
-			 uint64_t addr);
+			 uint64_t addr, uint32_t flags);
 uint32_t rs600_mc_rreg(struct radeon_device *rdev, uint32_t reg);
 void rs600_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 void rs600_bandwidth_update(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index b7d3e846cd76..d684642d900b 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -243,7 +243,8 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
 			page_base = rdev->gart.pages_addr[p];
 			for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
 				if (rdev->gart.ptr) {
-					radeon_gart_set_page(rdev, t, page_base);
+					radeon_gart_set_page(rdev, t, page_base,
+							     RADEON_GART_PAGE_DUMMY);
 				}
 				page_base += RADEON_GPU_PAGE_SIZE;
 			}
@@ -261,13 +262,15 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
  * @pages: number of pages to bind
  * @pagelist: pages to bind
  * @dma_addr: DMA addresses of pages
+ * @flags: RADEON_GART_PAGE_* flags
  *
  * Binds the requested pages to the gart page table
  * (all asics).
  * Returns 0 for success, -EINVAL for failure.
  */
 int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
-		     int pages, struct page **pagelist, dma_addr_t *dma_addr)
+		     int pages, struct page **pagelist, dma_addr_t *dma_addr,
+		     uint32_t flags)
 {
 	unsigned t;
 	unsigned p;
@@ -287,7 +290,7 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
 		if (rdev->gart.ptr) {
 			page_base = rdev->gart.pages_addr[p];
 			for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
-				radeon_gart_set_page(rdev, t, page_base);
+				radeon_gart_set_page(rdev, t, page_base, flags);
 				page_base += RADEON_GPU_PAGE_SIZE;
 			}
 		}
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index c8a8a5144ec1..7fb7c1cc6af3 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -521,6 +521,8 @@ static int radeon_ttm_backend_bind(struct ttm_tt *ttm,
 				   struct ttm_mem_reg *bo_mem)
 {
 	struct radeon_ttm_tt *gtt = (void*)ttm;
+	uint32_t flags = RADEON_GART_PAGE_VALID | RADEON_GART_PAGE_READ |
+		RADEON_GART_PAGE_WRITE;
 	int r;
 
 	gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT);
@@ -528,8 +530,10 @@ static int radeon_ttm_backend_bind(struct ttm_tt *ttm,
 		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
 		     ttm->num_pages, bo_mem, ttm);
 	}
-	r = radeon_gart_bind(gtt->rdev, gtt->offset,
-			     ttm->num_pages, ttm->pages, gtt->ttm.dma_address);
+	if (ttm->caching_state == tt_cached)
+		flags |= RADEON_GART_PAGE_SNOOP;
+	r = radeon_gart_bind(gtt->rdev, gtt->offset, ttm->num_pages,
+			     ttm->pages, gtt->ttm.dma_address, flags);
 	if (r) {
 		DRM_ERROR("failed to bind %lu pages at 0x%08X\n",
 			  ttm->num_pages, (unsigned)gtt->offset);
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index 4519f9c93162..6c1fc339d228 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -208,17 +208,24 @@ void rs400_gart_fini(struct radeon_device *rdev)
 	radeon_gart_table_ram_free(rdev);
 }
 
+#define RS400_PTE_UNSNOOPED (1 << 0)
 #define RS400_PTE_WRITEABLE (1 << 2)
 #define RS400_PTE_READABLE  (1 << 3)
 
-void rs400_gart_set_page(struct radeon_device *rdev, unsigned i, uint64_t addr)
+void rs400_gart_set_page(struct radeon_device *rdev, unsigned i,
+			 uint64_t addr, uint32_t flags)
 {
 	uint32_t entry;
 	u32 *gtt = rdev->gart.ptr;
 
 	entry = (lower_32_bits(addr) & PAGE_MASK) |
-		((upper_32_bits(addr) & 0xff) << 4) |
-		RS400_PTE_WRITEABLE | RS400_PTE_READABLE;
+		((upper_32_bits(addr) & 0xff) << 4);
+	if (flags & RADEON_GART_PAGE_READ)
+		addr |= RS400_PTE_READABLE;
+	if (flags & RADEON_GART_PAGE_WRITE)
+		addr |= RS400_PTE_WRITEABLE;
+	if (!(flags & RADEON_GART_PAGE_SNOOP))
+		entry |= RS400_PTE_UNSNOOPED;
 	entry = cpu_to_le32(entry);
 	gtt[i] = entry;
 }
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 27a56ad3a727..5f6db4629aaa 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -625,15 +625,21 @@ static void rs600_gart_fini(struct radeon_device *rdev)
 	radeon_gart_table_vram_free(rdev);
 }
 
-void rs600_gart_set_page(struct radeon_device *rdev, unsigned i, uint64_t addr)
+void rs600_gart_set_page(struct radeon_device *rdev, unsigned i,
+			 uint64_t addr, uint32_t flags)
 {
 	void __iomem *ptr = (void *)rdev->gart.ptr;
 
 	addr = addr & 0xFFFFFFFFFFFFF000ULL;
-	if (addr == rdev->dummy_page.addr)
-		addr |= R600_PTE_SYSTEM | R600_PTE_SNOOPED;
-	else
-		addr |= R600_PTE_GART;
+	addr |= R600_PTE_SYSTEM;
+	if (flags & RADEON_GART_PAGE_VALID)
+		addr |= R600_PTE_VALID;
+	if (flags & RADEON_GART_PAGE_READ)
+		addr |= R600_PTE_READABLE;
+	if (flags & RADEON_GART_PAGE_WRITE)
+		addr |= R600_PTE_WRITEABLE;
+	if (flags & RADEON_GART_PAGE_SNOOP)
+		addr |= R600_PTE_SNOOPED;
 	writeq(addr, ptr + (i * 8));
 }
 
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 1cc0b610f162..509b2d7a41b7 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -796,7 +796,9 @@ struct drm_radeon_gem_info {
 	uint64_t	vram_visible;
 };
 
-#define RADEON_GEM_NO_BACKING_STORE 1
+#define RADEON_GEM_NO_BACKING_STORE	(1 << 0)
+#define RADEON_GEM_GTT_UC		(1 << 1)
+#define RADEON_GEM_GTT_WC		(1 << 2)
 
 struct drm_radeon_gem_create {
 	uint64_t	size;
-- 
cgit v1.2.3


From 753a2ad54ef45e3417a9d49537c2b42b04a2e1be Mon Sep 17 00:00:00 2001
From: Pablo Neira <pablo@netfilter.org>
Date: Thu, 7 Aug 2014 00:17:09 +0200
Subject: net: reallocate new socket option number for IPV6_AUTOFLOWLABEL

cb1ce2e ("ipv6: Implement automatic flow label generation on transmit")
accidentally uses socket option 64, which is already used by ip6tables:

 IP6T_SO_SET_REPLACE / IP6T_SO_GET_INFO               64
 IP6T_SO_SET_ADD_COUNTERS / IP6T_SO_GET_ENTRIES       65

There is comment include/uapi/linux/in6.h warning about that.

Allocate 70 for this, which seems to be unused instead.

Cc: Tom Herbert <therbert@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/in6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index 22b7a69619d8..74a2a1773494 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -233,7 +233,6 @@ struct in6_flowlabel_req {
 #if 0	/* not yet */
 #define IPV6_USE_MIN_MTU	63
 #endif
-#define IPV6_AUTOFLOWLABEL	64
 
 /*
  * Netfilter (1)
@@ -262,6 +261,7 @@ struct in6_flowlabel_req {
  * IP6T_SO_ORIGINAL_DST		80
  */
 
+#define IPV6_AUTOFLOWLABEL	70
 /* RFC5014: Source address selection */
 #define IPV6_ADDR_PREFERENCES	72
 
-- 
cgit v1.2.3


From 40e041a2c858b3caefc757e26cb85bfceae5062b Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Fri, 8 Aug 2014 14:25:27 -0700
Subject: shm: add sealing API

If two processes share a common memory region, they usually want some
guarantees to allow safe access. This often includes:
  - one side cannot overwrite data while the other reads it
  - one side cannot shrink the buffer while the other accesses it
  - one side cannot grow the buffer beyond previously set boundaries

If there is a trust-relationship between both parties, there is no need
for policy enforcement.  However, if there's no trust relationship (eg.,
for general-purpose IPC) sharing memory-regions is highly fragile and
often not possible without local copies.  Look at the following two
use-cases:

  1) A graphics client wants to share its rendering-buffer with a
     graphics-server. The memory-region is allocated by the client for
     read/write access and a second FD is passed to the server. While
     scanning out from the memory region, the server has no guarantee that
     the client doesn't shrink the buffer at any time, requiring rather
     cumbersome SIGBUS handling.
  2) A process wants to perform an RPC on another process. To avoid huge
     bandwidth consumption, zero-copy is preferred. After a message is
     assembled in-memory and a FD is passed to the remote side, both sides
     want to be sure that neither modifies this shared copy, anymore. The
     source may have put sensible data into the message without a separate
     copy and the target may want to parse the message inline, to avoid a
     local copy.

While SIGBUS handling, POSIX mandatory locking and MAP_DENYWRITE provide
ways to achieve most of this, the first one is unproportionally ugly to
use in libraries and the latter two are broken/racy or even disabled due
to denial of service attacks.

This patch introduces the concept of SEALING.  If you seal a file, a
specific set of operations is blocked on that file forever.  Unlike locks,
seals can only be set, never removed.  Hence, once you verified a specific
set of seals is set, you're guaranteed that no-one can perform the blocked
operations on this file, anymore.

An initial set of SEALS is introduced by this patch:
  - SHRINK: If SEAL_SHRINK is set, the file in question cannot be reduced
            in size. This affects ftruncate() and open(O_TRUNC).
  - GROW: If SEAL_GROW is set, the file in question cannot be increased
          in size. This affects ftruncate(), fallocate() and write().
  - WRITE: If SEAL_WRITE is set, no write operations (besides resizing)
           are possible. This affects fallocate(PUNCH_HOLE), mmap() and
           write().
  - SEAL: If SEAL_SEAL is set, no further seals can be added to a file.
          This basically prevents the F_ADD_SEAL operation on a file and
          can be set to prevent others from adding further seals that you
          don't want.

The described use-cases can easily use these seals to provide safe use
without any trust-relationship:

  1) The graphics server can verify that a passed file-descriptor has
     SEAL_SHRINK set. This allows safe scanout, while the client is
     allowed to increase buffer size for window-resizing on-the-fly.
     Concurrent writes are explicitly allowed.
  2) For general-purpose IPC, both processes can verify that SEAL_SHRINK,
     SEAL_GROW and SEAL_WRITE are set. This guarantees that neither
     process can modify the data while the other side parses it.
     Furthermore, it guarantees that even with writable FDs passed to the
     peer, it cannot increase the size to hit memory-limits of the source
     process (in case the file-storage is accounted to the source).

The new API is an extension to fcntl(), adding two new commands:
  F_GET_SEALS: Return a bitset describing the seals on the file. This
               can be called on any FD if the underlying file supports
               sealing.
  F_ADD_SEALS: Change the seals of a given file. This requires WRITE
               access to the file and F_SEAL_SEAL may not already be set.
               Furthermore, the underlying file must support sealing and
               there may not be any existing shared mapping of that file.
               Otherwise, EBADF/EPERM is returned.
               The given seals are _added_ to the existing set of seals
               on the file. You cannot remove seals again.

The fcntl() handler is currently specific to shmem and disabled on all
files. A file needs to explicitly support sealing for this interface to
work. A separate syscall is added in a follow-up, which creates files that
support sealing. There is no intention to support this on other
file-systems. Semantics are unclear for non-volatile files and we lack any
use-case right now. Therefore, the implementation is specific to shmem.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Ryan Lortie <desrt@desrt.ca>
Cc: Lennart Poettering <lennart@poettering.net>
Cc: Daniel Mack <zonque@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fcntl.c                 |   5 ++
 include/linux/shmem_fs.h   |  17 ++++++
 include/uapi/linux/fcntl.h |  15 +++++
 mm/shmem.c                 | 143 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 180 insertions(+)

(limited to 'include/uapi')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 72c82f69b01b..22d1c3df61ac 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -21,6 +21,7 @@
 #include <linux/rcupdate.h>
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
+#include <linux/shmem_fs.h>
 
 #include <asm/poll.h>
 #include <asm/siginfo.h>
@@ -336,6 +337,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	case F_GETPIPE_SZ:
 		err = pipe_fcntl(filp, cmd, arg);
 		break;
+	case F_ADD_SEALS:
+	case F_GET_SEALS:
+		err = shmem_fcntl(filp, cmd, arg);
+		break;
 	default:
 		break;
 	}
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 4d1771c2d29f..50777b5b1e4c 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -1,6 +1,7 @@
 #ifndef __SHMEM_FS_H
 #define __SHMEM_FS_H
 
+#include <linux/file.h>
 #include <linux/swap.h>
 #include <linux/mempolicy.h>
 #include <linux/pagemap.h>
@@ -11,6 +12,7 @@
 
 struct shmem_inode_info {
 	spinlock_t		lock;
+	unsigned int		seals;		/* shmem seals */
 	unsigned long		flags;
 	unsigned long		alloced;	/* data pages alloced to file */
 	union {
@@ -65,4 +67,19 @@ static inline struct page *shmem_read_mapping_page(
 					mapping_gfp_mask(mapping));
 }
 
+#ifdef CONFIG_TMPFS
+
+extern int shmem_add_seals(struct file *file, unsigned int seals);
+extern int shmem_get_seals(struct file *file);
+extern long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg);
+
+#else
+
+static inline long shmem_fcntl(struct file *f, unsigned int c, unsigned long a)
+{
+	return -EINVAL;
+}
+
+#endif
+
 #endif
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 074b886c6be0..beed138bd359 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -27,6 +27,21 @@
 #define F_SETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 7)
 #define F_GETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 8)
 
+/*
+ * Set/Get seals
+ */
+#define F_ADD_SEALS	(F_LINUX_SPECIFIC_BASE + 9)
+#define F_GET_SEALS	(F_LINUX_SPECIFIC_BASE + 10)
+
+/*
+ * Types of seals
+ */
+#define F_SEAL_SEAL	0x0001	/* prevent further seals from being set */
+#define F_SEAL_SHRINK	0x0002	/* prevent file from shrinking */
+#define F_SEAL_GROW	0x0004	/* prevent file from growing */
+#define F_SEAL_WRITE	0x0008	/* prevent writes */
+/* (1U << 31) is reserved for signed error codes */
+
 /*
  * Types of directory notifications that may be requested.
  */
diff --git a/mm/shmem.c b/mm/shmem.c
index 6dc80d298f9d..8b43bb7a4efe 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -66,6 +66,7 @@ static struct vfsmount *shm_mnt;
 #include <linux/highmem.h>
 #include <linux/seq_file.h>
 #include <linux/magic.h>
+#include <linux/fcntl.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -547,6 +548,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
 static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
+	struct shmem_inode_info *info = SHMEM_I(inode);
 	int error;
 
 	error = inode_change_ok(inode, attr);
@@ -557,6 +559,11 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 		loff_t oldsize = inode->i_size;
 		loff_t newsize = attr->ia_size;
 
+		/* protected by i_mutex */
+		if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
+		    (newsize > oldsize && (info->seals & F_SEAL_GROW)))
+			return -EPERM;
+
 		if (newsize != oldsize) {
 			error = shmem_reacct_size(SHMEM_I(inode)->flags,
 					oldsize, newsize);
@@ -1412,6 +1419,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
 		info = SHMEM_I(inode);
 		memset(info, 0, (char *)inode - (char *)info);
 		spin_lock_init(&info->lock);
+		info->seals = F_SEAL_SEAL;
 		info->flags = flags & VM_NORESERVE;
 		INIT_LIST_HEAD(&info->swaplist);
 		simple_xattrs_init(&info->xattrs);
@@ -1470,7 +1478,17 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
 			struct page **pagep, void **fsdata)
 {
 	struct inode *inode = mapping->host;
+	struct shmem_inode_info *info = SHMEM_I(inode);
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+
+	/* i_mutex is held by caller */
+	if (unlikely(info->seals)) {
+		if (info->seals & F_SEAL_WRITE)
+			return -EPERM;
+		if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
+			return -EPERM;
+	}
+
 	return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
 }
 
@@ -1808,11 +1826,125 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
 	return offset;
 }
 
+static int shmem_wait_for_pins(struct address_space *mapping)
+{
+	return 0;
+}
+
+#define F_ALL_SEALS (F_SEAL_SEAL | \
+		     F_SEAL_SHRINK | \
+		     F_SEAL_GROW | \
+		     F_SEAL_WRITE)
+
+int shmem_add_seals(struct file *file, unsigned int seals)
+{
+	struct inode *inode = file_inode(file);
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	int error;
+
+	/*
+	 * SEALING
+	 * Sealing allows multiple parties to share a shmem-file but restrict
+	 * access to a specific subset of file operations. Seals can only be
+	 * added, but never removed. This way, mutually untrusted parties can
+	 * share common memory regions with a well-defined policy. A malicious
+	 * peer can thus never perform unwanted operations on a shared object.
+	 *
+	 * Seals are only supported on special shmem-files and always affect
+	 * the whole underlying inode. Once a seal is set, it may prevent some
+	 * kinds of access to the file. Currently, the following seals are
+	 * defined:
+	 *   SEAL_SEAL: Prevent further seals from being set on this file
+	 *   SEAL_SHRINK: Prevent the file from shrinking
+	 *   SEAL_GROW: Prevent the file from growing
+	 *   SEAL_WRITE: Prevent write access to the file
+	 *
+	 * As we don't require any trust relationship between two parties, we
+	 * must prevent seals from being removed. Therefore, sealing a file
+	 * only adds a given set of seals to the file, it never touches
+	 * existing seals. Furthermore, the "setting seals"-operation can be
+	 * sealed itself, which basically prevents any further seal from being
+	 * added.
+	 *
+	 * Semantics of sealing are only defined on volatile files. Only
+	 * anonymous shmem files support sealing. More importantly, seals are
+	 * never written to disk. Therefore, there's no plan to support it on
+	 * other file types.
+	 */
+
+	if (file->f_op != &shmem_file_operations)
+		return -EINVAL;
+	if (!(file->f_mode & FMODE_WRITE))
+		return -EPERM;
+	if (seals & ~(unsigned int)F_ALL_SEALS)
+		return -EINVAL;
+
+	mutex_lock(&inode->i_mutex);
+
+	if (info->seals & F_SEAL_SEAL) {
+		error = -EPERM;
+		goto unlock;
+	}
+
+	if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
+		error = mapping_deny_writable(file->f_mapping);
+		if (error)
+			goto unlock;
+
+		error = shmem_wait_for_pins(file->f_mapping);
+		if (error) {
+			mapping_allow_writable(file->f_mapping);
+			goto unlock;
+		}
+	}
+
+	info->seals |= seals;
+	error = 0;
+
+unlock:
+	mutex_unlock(&inode->i_mutex);
+	return error;
+}
+EXPORT_SYMBOL_GPL(shmem_add_seals);
+
+int shmem_get_seals(struct file *file)
+{
+	if (file->f_op != &shmem_file_operations)
+		return -EINVAL;
+
+	return SHMEM_I(file_inode(file))->seals;
+}
+EXPORT_SYMBOL_GPL(shmem_get_seals);
+
+long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	long error;
+
+	switch (cmd) {
+	case F_ADD_SEALS:
+		/* disallow upper 32bit */
+		if (arg > UINT_MAX)
+			return -EINVAL;
+
+		error = shmem_add_seals(file, arg);
+		break;
+	case F_GET_SEALS:
+		error = shmem_get_seals(file);
+		break;
+	default:
+		error = -EINVAL;
+		break;
+	}
+
+	return error;
+}
+
 static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 							 loff_t len)
 {
 	struct inode *inode = file_inode(file);
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct shmem_falloc shmem_falloc;
 	pgoff_t start, index, end;
 	int error;
@@ -1828,6 +1960,12 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 		loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
 		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
 
+		/* protected by i_mutex */
+		if (info->seals & F_SEAL_WRITE) {
+			error = -EPERM;
+			goto out;
+		}
+
 		shmem_falloc.waitq = &shmem_falloc_waitq;
 		shmem_falloc.start = unmap_start >> PAGE_SHIFT;
 		shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
@@ -1854,6 +1992,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 	if (error)
 		goto out;
 
+	if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
+		error = -EPERM;
+		goto out;
+	}
+
 	start = offset >> PAGE_CACHE_SHIFT;
 	end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	/* Try to avoid a swapstorm if len is impossible to satisfy */
-- 
cgit v1.2.3


From 9183df25fe7b194563db3fec6dc3202a5855839c Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Fri, 8 Aug 2014 14:25:29 -0700
Subject: shm: add memfd_create() syscall

memfd_create() is similar to mmap(MAP_ANON), but returns a file-descriptor
that you can pass to mmap().  It can support sealing and avoids any
connection to user-visible mount-points.  Thus, it's not subject to quotas
on mounted file-systems, but can be used like malloc()'ed memory, but with
a file-descriptor to it.

memfd_create() returns the raw shmem file, so calls like ftruncate() can
be used to modify the underlying inode.  Also calls like fstat() will
return proper information and mark the file as regular file.  If you want
sealing, you can specify MFD_ALLOW_SEALING.  Otherwise, sealing is not
supported (like on all other regular files).

Compared to O_TMPFILE, it does not require a tmpfs mount-point and is not
subject to a filesystem size limit.  It is still properly accounted to
memcg limits, though, and to the same overcommit or no-overcommit
accounting as all user memory.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Ryan Lortie <desrt@desrt.ca>
Cc: Lennart Poettering <lennart@poettering.net>
Cc: Daniel Mack <zonque@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/syscalls/syscall_32.tbl |  1 +
 arch/x86/syscalls/syscall_64.tbl |  1 +
 include/linux/syscalls.h         |  1 +
 include/uapi/linux/memfd.h       |  8 +++++
 kernel/sys_ni.c                  |  1 +
 mm/shmem.c                       | 73 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 85 insertions(+)
 create mode 100644 include/uapi/linux/memfd.h

(limited to 'include/uapi')

diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index d1b4a119d4a5..028b78168d85 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -362,3 +362,4 @@
 353	i386	renameat2		sys_renameat2
 354	i386	seccomp			sys_seccomp
 355	i386	getrandom		sys_getrandom
+356	i386	memfd_create		sys_memfd_create
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 252c804bb1aa..ca2b9aa78c81 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -325,6 +325,7 @@
 316	common	renameat2		sys_renameat2
 317	common	seccomp			sys_seccomp
 318	common	getrandom		sys_getrandom
+319	common	memfd_create		sys_memfd_create
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 701daff5d899..15a069425cbf 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -802,6 +802,7 @@ asmlinkage long sys_timerfd_settime(int ufd, int flags,
 asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr);
 asmlinkage long sys_eventfd(unsigned int count);
 asmlinkage long sys_eventfd2(unsigned int count, int flags);
+asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags);
 asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
 asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int);
 asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *,
diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h
new file mode 100644
index 000000000000..534e364bda92
--- /dev/null
+++ b/include/uapi/linux/memfd.h
@@ -0,0 +1,8 @@
+#ifndef _UAPI_LINUX_MEMFD_H
+#define _UAPI_LINUX_MEMFD_H
+
+/* flags for memfd_create(2) (unsigned int) */
+#define MFD_CLOEXEC		0x0001U
+#define MFD_ALLOW_SEALING	0x0002U
+
+#endif /* _UAPI_LINUX_MEMFD_H */
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 2904a2105914..1f79e3714533 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -197,6 +197,7 @@ cond_syscall(compat_sys_timerfd_settime);
 cond_syscall(compat_sys_timerfd_gettime);
 cond_syscall(sys_eventfd);
 cond_syscall(sys_eventfd2);
+cond_syscall(sys_memfd_create);
 
 /* performance counters: */
 cond_syscall(sys_perf_event_open);
diff --git a/mm/shmem.c b/mm/shmem.c
index 8b43bb7a4efe..4a5498795a2b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -66,7 +66,9 @@ static struct vfsmount *shm_mnt;
 #include <linux/highmem.h>
 #include <linux/seq_file.h>
 #include <linux/magic.h>
+#include <linux/syscalls.h>
 #include <linux/fcntl.h>
+#include <uapi/linux/memfd.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -2732,6 +2734,77 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
 	shmem_show_mpol(seq, sbinfo->mpol);
 	return 0;
 }
+
+#define MFD_NAME_PREFIX "memfd:"
+#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
+#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
+
+#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING)
+
+SYSCALL_DEFINE2(memfd_create,
+		const char __user *, uname,
+		unsigned int, flags)
+{
+	struct shmem_inode_info *info;
+	struct file *file;
+	int fd, error;
+	char *name;
+	long len;
+
+	if (flags & ~(unsigned int)MFD_ALL_FLAGS)
+		return -EINVAL;
+
+	/* length includes terminating zero */
+	len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
+	if (len <= 0)
+		return -EFAULT;
+	if (len > MFD_NAME_MAX_LEN + 1)
+		return -EINVAL;
+
+	name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY);
+	if (!name)
+		return -ENOMEM;
+
+	strcpy(name, MFD_NAME_PREFIX);
+	if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
+		error = -EFAULT;
+		goto err_name;
+	}
+
+	/* terminating-zero may have changed after strnlen_user() returned */
+	if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
+		error = -EFAULT;
+		goto err_name;
+	}
+
+	fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
+	if (fd < 0) {
+		error = fd;
+		goto err_name;
+	}
+
+	file = shmem_file_setup(name, 0, VM_NORESERVE);
+	if (IS_ERR(file)) {
+		error = PTR_ERR(file);
+		goto err_fd;
+	}
+	info = SHMEM_I(file_inode(file));
+	file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
+	file->f_flags |= O_RDWR | O_LARGEFILE;
+	if (flags & MFD_ALLOW_SEALING)
+		info->seals &= ~F_SEAL_SEAL;
+
+	fd_install(fd, file);
+	kfree(name);
+	return fd;
+
+err_fd:
+	put_unused_fd(fd);
+err_name:
+	kfree(name);
+	return error;
+}
+
 #endif /* CONFIG_TMPFS */
 
 static void shmem_put_super(struct super_block *sb)
-- 
cgit v1.2.3


From cb1052581e2bddd6096544f3f944f4e7fdad4c7f Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:25:57 -0700
Subject: kexec: implementation of new syscall kexec_file_load

Previous patch provided the interface definition and this patch prvides
implementation of new syscall.

Previously segment list was prepared in user space.  Now user space just
passes kernel fd, initrd fd and command line and kernel will create a
segment list internally.

This patch contains generic part of the code.  Actual segment preparation
and loading is done by arch and image specific loader.  Which comes in
next patch.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/machine_kexec_64.c |  45 ++++
 include/linux/kexec.h              |  53 ++++
 include/uapi/linux/kexec.h         |  11 +
 kernel/kexec.c                     | 483 ++++++++++++++++++++++++++++++++++++-
 4 files changed, 587 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 679cef0791cd..c8875b5545e1 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -22,6 +22,10 @@
 #include <asm/mmu_context.h>
 #include <asm/debugreg.h>
 
+static struct kexec_file_ops *kexec_file_loaders[] = {
+		NULL,
+};
+
 static void free_transition_pgtable(struct kimage *image)
 {
 	free_page((unsigned long)image->arch.pud);
@@ -283,3 +287,44 @@ void arch_crash_save_vmcoreinfo(void)
 			      (unsigned long)&_text - __START_KERNEL);
 }
 
+/* arch-dependent functionality related to kexec file-based syscall */
+
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+				  unsigned long buf_len)
+{
+	int i, ret = -ENOEXEC;
+	struct kexec_file_ops *fops;
+
+	for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
+		fops = kexec_file_loaders[i];
+		if (!fops || !fops->probe)
+			continue;
+
+		ret = fops->probe(buf, buf_len);
+		if (!ret) {
+			image->fops = fops;
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+void *arch_kexec_kernel_image_load(struct kimage *image)
+{
+	if (!image->fops || !image->fops->load)
+		return ERR_PTR(-ENOEXEC);
+
+	return image->fops->load(image, image->kernel_buf,
+				 image->kernel_buf_len, image->initrd_buf,
+				 image->initrd_buf_len, image->cmdline_buf,
+				 image->cmdline_buf_len);
+}
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+	if (!image->fops || !image->fops->cleanup)
+		return 0;
+
+	return image->fops->cleanup(image);
+}
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 66d56ac0f64c..8e80901e466f 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -121,13 +121,57 @@ struct kimage {
 #define KEXEC_TYPE_DEFAULT 0
 #define KEXEC_TYPE_CRASH   1
 	unsigned int preserve_context : 1;
+	/* If set, we are using file mode kexec syscall */
+	unsigned int file_mode:1;
 
 #ifdef ARCH_HAS_KIMAGE_ARCH
 	struct kimage_arch arch;
 #endif
+
+	/* Additional fields for file based kexec syscall */
+	void *kernel_buf;
+	unsigned long kernel_buf_len;
+
+	void *initrd_buf;
+	unsigned long initrd_buf_len;
+
+	char *cmdline_buf;
+	unsigned long cmdline_buf_len;
+
+	/* File operations provided by image loader */
+	struct kexec_file_ops *fops;
+
+	/* Image loader handling the kernel can store a pointer here */
+	void *image_loader_data;
 };
 
+/*
+ * Keeps track of buffer parameters as provided by caller for requesting
+ * memory placement of buffer.
+ */
+struct kexec_buf {
+	struct kimage *image;
+	char *buffer;
+	unsigned long bufsz;
+	unsigned long memsz;
+	unsigned long buf_align;
+	unsigned long buf_min;
+	unsigned long buf_max;
+	bool top_down;		/* allocate from top of memory hole */
+};
 
+typedef int (kexec_probe_t)(const char *kernel_buf, unsigned long kernel_size);
+typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf,
+			     unsigned long kernel_len, char *initrd,
+			     unsigned long initrd_len, char *cmdline,
+			     unsigned long cmdline_len);
+typedef int (kexec_cleanup_t)(struct kimage *image);
+
+struct kexec_file_ops {
+	kexec_probe_t *probe;
+	kexec_load_t *load;
+	kexec_cleanup_t *cleanup;
+};
 
 /* kexec interface functions */
 extern void machine_kexec(struct kimage *image);
@@ -138,6 +182,11 @@ extern asmlinkage long sys_kexec_load(unsigned long entry,
 					struct kexec_segment __user *segments,
 					unsigned long flags);
 extern int kernel_kexec(void);
+extern int kexec_add_buffer(struct kimage *image, char *buffer,
+			    unsigned long bufsz, unsigned long memsz,
+			    unsigned long buf_align, unsigned long buf_min,
+			    unsigned long buf_max, bool top_down,
+			    unsigned long *load_addr);
 extern struct page *kimage_alloc_control_pages(struct kimage *image,
 						unsigned int order);
 extern void crash_kexec(struct pt_regs *);
@@ -188,6 +237,10 @@ extern int kexec_load_disabled;
 #define KEXEC_FLAGS    (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)
 #endif
 
+/* List of defined/legal kexec file flags */
+#define KEXEC_FILE_FLAGS	(KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
+				 KEXEC_FILE_NO_INITRAMFS)
+
 #define VMCOREINFO_BYTES           (4096)
 #define VMCOREINFO_NOTE_NAME       "VMCOREINFO"
 #define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index d6629d49a243..6925f5b42f89 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -13,6 +13,17 @@
 #define KEXEC_PRESERVE_CONTEXT	0x00000002
 #define KEXEC_ARCH_MASK		0xffff0000
 
+/*
+ * Kexec file load interface flags.
+ * KEXEC_FILE_UNLOAD : Unload already loaded kexec/kdump image.
+ * KEXEC_FILE_ON_CRASH : Load/unload operation belongs to kdump image.
+ * KEXEC_FILE_NO_INITRAMFS : No initramfs is being loaded. Ignore the initrd
+ *                           fd field.
+ */
+#define KEXEC_FILE_UNLOAD	0x00000001
+#define KEXEC_FILE_ON_CRASH	0x00000002
+#define KEXEC_FILE_NO_INITRAMFS	0x00000004
+
 /* These values match the ELF architecture values.
  * Unless there is a good reason that should continue to be the case.
  */
diff --git a/kernel/kexec.c b/kernel/kexec.c
index ec4386c1b94f..9b46219254dd 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -6,6 +6,8 @@
  * Version 2.  See the file COPYING for more details.
  */
 
+#define pr_fmt(fmt)	"kexec: " fmt
+
 #include <linux/capability.h>
 #include <linux/mm.h>
 #include <linux/file.h>
@@ -327,6 +329,221 @@ out_free_image:
 	return ret;
 }
 
+static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len)
+{
+	struct fd f = fdget(fd);
+	int ret;
+	struct kstat stat;
+	loff_t pos;
+	ssize_t bytes = 0;
+
+	if (!f.file)
+		return -EBADF;
+
+	ret = vfs_getattr(&f.file->f_path, &stat);
+	if (ret)
+		goto out;
+
+	if (stat.size > INT_MAX) {
+		ret = -EFBIG;
+		goto out;
+	}
+
+	/* Don't hand 0 to vmalloc, it whines. */
+	if (stat.size == 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	*buf = vmalloc(stat.size);
+	if (!*buf) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	pos = 0;
+	while (pos < stat.size) {
+		bytes = kernel_read(f.file, pos, (char *)(*buf) + pos,
+				    stat.size - pos);
+		if (bytes < 0) {
+			vfree(*buf);
+			ret = bytes;
+			goto out;
+		}
+
+		if (bytes == 0)
+			break;
+		pos += bytes;
+	}
+
+	if (pos != stat.size) {
+		ret = -EBADF;
+		vfree(*buf);
+		goto out;
+	}
+
+	*buf_len = pos;
+out:
+	fdput(f);
+	return ret;
+}
+
+/* Architectures can provide this probe function */
+int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+					 unsigned long buf_len)
+{
+	return -ENOEXEC;
+}
+
+void * __weak arch_kexec_kernel_image_load(struct kimage *image)
+{
+	return ERR_PTR(-ENOEXEC);
+}
+
+void __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+}
+
+/*
+ * Free up memory used by kernel, initrd, and comand line. This is temporary
+ * memory allocation which is not needed any more after these buffers have
+ * been loaded into separate segments and have been copied elsewhere.
+ */
+static void kimage_file_post_load_cleanup(struct kimage *image)
+{
+	vfree(image->kernel_buf);
+	image->kernel_buf = NULL;
+
+	vfree(image->initrd_buf);
+	image->initrd_buf = NULL;
+
+	kfree(image->cmdline_buf);
+	image->cmdline_buf = NULL;
+
+	/* See if architecture has anything to cleanup post load */
+	arch_kimage_file_post_load_cleanup(image);
+}
+
+/*
+ * In file mode list of segments is prepared by kernel. Copy relevant
+ * data from user space, do error checking, prepare segment list
+ */
+static int
+kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
+			     const char __user *cmdline_ptr,
+			     unsigned long cmdline_len, unsigned flags)
+{
+	int ret = 0;
+	void *ldata;
+
+	ret = copy_file_from_fd(kernel_fd, &image->kernel_buf,
+				&image->kernel_buf_len);
+	if (ret)
+		return ret;
+
+	/* Call arch image probe handlers */
+	ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
+					    image->kernel_buf_len);
+
+	if (ret)
+		goto out;
+
+	/* It is possible that there no initramfs is being loaded */
+	if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
+		ret = copy_file_from_fd(initrd_fd, &image->initrd_buf,
+					&image->initrd_buf_len);
+		if (ret)
+			goto out;
+	}
+
+	if (cmdline_len) {
+		image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL);
+		if (!image->cmdline_buf) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		ret = copy_from_user(image->cmdline_buf, cmdline_ptr,
+				     cmdline_len);
+		if (ret) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		image->cmdline_buf_len = cmdline_len;
+
+		/* command line should be a string with last byte null */
+		if (image->cmdline_buf[cmdline_len - 1] != '\0') {
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+	/* Call arch image load handlers */
+	ldata = arch_kexec_kernel_image_load(image);
+
+	if (IS_ERR(ldata)) {
+		ret = PTR_ERR(ldata);
+		goto out;
+	}
+
+	image->image_loader_data = ldata;
+out:
+	/* In case of error, free up all allocated memory in this function */
+	if (ret)
+		kimage_file_post_load_cleanup(image);
+	return ret;
+}
+
+static int
+kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
+		       int initrd_fd, const char __user *cmdline_ptr,
+		       unsigned long cmdline_len, unsigned long flags)
+{
+	int ret;
+	struct kimage *image;
+
+	image = do_kimage_alloc_init();
+	if (!image)
+		return -ENOMEM;
+
+	image->file_mode = 1;
+
+	ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
+					   cmdline_ptr, cmdline_len, flags);
+	if (ret)
+		goto out_free_image;
+
+	ret = sanity_check_segment_list(image);
+	if (ret)
+		goto out_free_post_load_bufs;
+
+	ret = -ENOMEM;
+	image->control_code_page = kimage_alloc_control_pages(image,
+					   get_order(KEXEC_CONTROL_PAGE_SIZE));
+	if (!image->control_code_page) {
+		pr_err("Could not allocate control_code_buffer\n");
+		goto out_free_post_load_bufs;
+	}
+
+	image->swap_page = kimage_alloc_control_pages(image, 0);
+	if (!image->swap_page) {
+		pr_err(KERN_ERR "Could not allocate swap buffer\n");
+		goto out_free_control_pages;
+	}
+
+	*rimage = image;
+	return 0;
+out_free_control_pages:
+	kimage_free_page_list(&image->control_pages);
+out_free_post_load_bufs:
+	kimage_file_post_load_cleanup(image);
+	kfree(image->image_loader_data);
+out_free_image:
+	kfree(image);
+	return ret;
+}
+
 static int kimage_is_destination_range(struct kimage *image,
 					unsigned long start,
 					unsigned long end)
@@ -644,6 +861,16 @@ static void kimage_free(struct kimage *image)
 
 	/* Free the kexec control pages... */
 	kimage_free_page_list(&image->control_pages);
+
+	kfree(image->image_loader_data);
+
+	/*
+	 * Free up any temporary buffers allocated. This might hit if
+	 * error occurred much later after buffer allocation.
+	 */
+	if (image->file_mode)
+		kimage_file_post_load_cleanup(image);
+
 	kfree(image);
 }
 
@@ -772,10 +999,14 @@ static int kimage_load_normal_segment(struct kimage *image,
 	unsigned long maddr;
 	size_t ubytes, mbytes;
 	int result;
-	unsigned char __user *buf;
+	unsigned char __user *buf = NULL;
+	unsigned char *kbuf = NULL;
 
 	result = 0;
-	buf = segment->buf;
+	if (image->file_mode)
+		kbuf = segment->kbuf;
+	else
+		buf = segment->buf;
 	ubytes = segment->bufsz;
 	mbytes = segment->memsz;
 	maddr = segment->mem;
@@ -807,7 +1038,11 @@ static int kimage_load_normal_segment(struct kimage *image,
 				PAGE_SIZE - (maddr & ~PAGE_MASK));
 		uchunk = min(ubytes, mchunk);
 
-		result = copy_from_user(ptr, buf, uchunk);
+		/* For file based kexec, source pages are in kernel memory */
+		if (image->file_mode)
+			memcpy(ptr, kbuf, uchunk);
+		else
+			result = copy_from_user(ptr, buf, uchunk);
 		kunmap(page);
 		if (result) {
 			result = -EFAULT;
@@ -815,7 +1050,10 @@ static int kimage_load_normal_segment(struct kimage *image,
 		}
 		ubytes -= uchunk;
 		maddr  += mchunk;
-		buf    += mchunk;
+		if (image->file_mode)
+			kbuf += mchunk;
+		else
+			buf += mchunk;
 		mbytes -= mchunk;
 	}
 out:
@@ -1062,7 +1300,72 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
 		unsigned long, cmdline_len, const char __user *, cmdline_ptr,
 		unsigned long, flags)
 {
-	return -ENOSYS;
+	int ret = 0, i;
+	struct kimage **dest_image, *image;
+
+	/* We only trust the superuser with rebooting the system. */
+	if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
+		return -EPERM;
+
+	/* Make sure we have a legal set of flags */
+	if (flags != (flags & KEXEC_FILE_FLAGS))
+		return -EINVAL;
+
+	image = NULL;
+
+	if (!mutex_trylock(&kexec_mutex))
+		return -EBUSY;
+
+	dest_image = &kexec_image;
+	if (flags & KEXEC_FILE_ON_CRASH)
+		dest_image = &kexec_crash_image;
+
+	if (flags & KEXEC_FILE_UNLOAD)
+		goto exchange;
+
+	/*
+	 * In case of crash, new kernel gets loaded in reserved region. It is
+	 * same memory where old crash kernel might be loaded. Free any
+	 * current crash dump kernel before we corrupt it.
+	 */
+	if (flags & KEXEC_FILE_ON_CRASH)
+		kimage_free(xchg(&kexec_crash_image, NULL));
+
+	ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr,
+				     cmdline_len, flags);
+	if (ret)
+		goto out;
+
+	ret = machine_kexec_prepare(image);
+	if (ret)
+		goto out;
+
+	for (i = 0; i < image->nr_segments; i++) {
+		struct kexec_segment *ksegment;
+
+		ksegment = &image->segment[i];
+		pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
+			 i, ksegment->buf, ksegment->bufsz, ksegment->mem,
+			 ksegment->memsz);
+
+		ret = kimage_load_segment(image, &image->segment[i]);
+		if (ret)
+			goto out;
+	}
+
+	kimage_terminate(image);
+
+	/*
+	 * Free up any temporary buffers allocated which are not needed
+	 * after image has been loaded
+	 */
+	kimage_file_post_load_cleanup(image);
+exchange:
+	image = xchg(dest_image, image);
+out:
+	mutex_unlock(&kexec_mutex);
+	kimage_free(image);
+	return ret;
 }
 
 void crash_kexec(struct pt_regs *regs)
@@ -1620,6 +1923,176 @@ static int __init crash_save_vmcoreinfo_init(void)
 
 subsys_initcall(crash_save_vmcoreinfo_init);
 
+static int __kexec_add_segment(struct kimage *image, char *buf,
+			       unsigned long bufsz, unsigned long mem,
+			       unsigned long memsz)
+{
+	struct kexec_segment *ksegment;
+
+	ksegment = &image->segment[image->nr_segments];
+	ksegment->kbuf = buf;
+	ksegment->bufsz = bufsz;
+	ksegment->mem = mem;
+	ksegment->memsz = memsz;
+	image->nr_segments++;
+
+	return 0;
+}
+
+static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
+				    struct kexec_buf *kbuf)
+{
+	struct kimage *image = kbuf->image;
+	unsigned long temp_start, temp_end;
+
+	temp_end = min(end, kbuf->buf_max);
+	temp_start = temp_end - kbuf->memsz;
+
+	do {
+		/* align down start */
+		temp_start = temp_start & (~(kbuf->buf_align - 1));
+
+		if (temp_start < start || temp_start < kbuf->buf_min)
+			return 0;
+
+		temp_end = temp_start + kbuf->memsz - 1;
+
+		/*
+		 * Make sure this does not conflict with any of existing
+		 * segments
+		 */
+		if (kimage_is_destination_range(image, temp_start, temp_end)) {
+			temp_start = temp_start - PAGE_SIZE;
+			continue;
+		}
+
+		/* We found a suitable memory range */
+		break;
+	} while (1);
+
+	/* If we are here, we found a suitable memory range */
+	__kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
+			    kbuf->memsz);
+
+	/* Success, stop navigating through remaining System RAM ranges */
+	return 1;
+}
+
+static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
+				     struct kexec_buf *kbuf)
+{
+	struct kimage *image = kbuf->image;
+	unsigned long temp_start, temp_end;
+
+	temp_start = max(start, kbuf->buf_min);
+
+	do {
+		temp_start = ALIGN(temp_start, kbuf->buf_align);
+		temp_end = temp_start + kbuf->memsz - 1;
+
+		if (temp_end > end || temp_end > kbuf->buf_max)
+			return 0;
+		/*
+		 * Make sure this does not conflict with any of existing
+		 * segments
+		 */
+		if (kimage_is_destination_range(image, temp_start, temp_end)) {
+			temp_start = temp_start + PAGE_SIZE;
+			continue;
+		}
+
+		/* We found a suitable memory range */
+		break;
+	} while (1);
+
+	/* If we are here, we found a suitable memory range */
+	__kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
+			    kbuf->memsz);
+
+	/* Success, stop navigating through remaining System RAM ranges */
+	return 1;
+}
+
+static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
+{
+	struct kexec_buf *kbuf = (struct kexec_buf *)arg;
+	unsigned long sz = end - start + 1;
+
+	/* Returning 0 will take to next memory range */
+	if (sz < kbuf->memsz)
+		return 0;
+
+	if (end < kbuf->buf_min || start > kbuf->buf_max)
+		return 0;
+
+	/*
+	 * Allocate memory top down with-in ram range. Otherwise bottom up
+	 * allocation.
+	 */
+	if (kbuf->top_down)
+		return locate_mem_hole_top_down(start, end, kbuf);
+	return locate_mem_hole_bottom_up(start, end, kbuf);
+}
+
+/*
+ * Helper function for placing a buffer in a kexec segment. This assumes
+ * that kexec_mutex is held.
+ */
+int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
+		     unsigned long memsz, unsigned long buf_align,
+		     unsigned long buf_min, unsigned long buf_max,
+		     bool top_down, unsigned long *load_addr)
+{
+
+	struct kexec_segment *ksegment;
+	struct kexec_buf buf, *kbuf;
+	int ret;
+
+	/* Currently adding segment this way is allowed only in file mode */
+	if (!image->file_mode)
+		return -EINVAL;
+
+	if (image->nr_segments >= KEXEC_SEGMENT_MAX)
+		return -EINVAL;
+
+	/*
+	 * Make sure we are not trying to add buffer after allocating
+	 * control pages. All segments need to be placed first before
+	 * any control pages are allocated. As control page allocation
+	 * logic goes through list of segments to make sure there are
+	 * no destination overlaps.
+	 */
+	if (!list_empty(&image->control_pages)) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	memset(&buf, 0, sizeof(struct kexec_buf));
+	kbuf = &buf;
+	kbuf->image = image;
+	kbuf->buffer = buffer;
+	kbuf->bufsz = bufsz;
+
+	kbuf->memsz = ALIGN(memsz, PAGE_SIZE);
+	kbuf->buf_align = max(buf_align, PAGE_SIZE);
+	kbuf->buf_min = buf_min;
+	kbuf->buf_max = buf_max;
+	kbuf->top_down = top_down;
+
+	/* Walk the RAM ranges and allocate a suitable range for the buffer */
+	ret = walk_system_ram_res(0, -1, kbuf, locate_mem_hole_callback);
+	if (ret != 1) {
+		/* A suitable memory range could not be found for buffer */
+		return -EADDRNOTAVAIL;
+	}
+
+	/* Found a suitable memory range */
+	ksegment = &image->segment[image->nr_segments - 1];
+	*load_addr = ksegment->mem;
+	return 0;
+}
+
+
 /*
  * Move into place and start executing a preloaded standalone
  * executable.  If nothing was preloaded return an error.
-- 
cgit v1.2.3


From a4e610b5e655186f81f18f027d2b7a15e010cbef Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Sun, 10 Aug 2014 04:10:23 +1000
Subject: drm/nouveau: use ioctl interface for abi16 grobj alloc

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_abi16.c | 77 +++++++++++++++++++--------------
 drivers/gpu/drm/nouveau/nouveau_drm.h   |  3 +-
 include/uapi/drm/nouveau_drm.h          |  8 ++++
 3 files changed, 54 insertions(+), 34 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 22c1eff58b40..ec15c97ec841 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -21,7 +21,9 @@
  *
  */
 
-#include <nvif/os.h>
+#include <nvif/client.h>
+#include <nvif/driver.h>
+#include <nvif/ioctl.h>
 #include <nvif/class.h>
 
 #include "nouveau_drm.h"
@@ -46,7 +48,7 @@ nouveau_abi16_get(struct drm_file *file_priv, struct drm_device *dev)
 			 * opened)
 			 */
 			if (nvif_device_init(&cli->base.base, NULL,
-					     NVDRM_DEVICE, NV_DEVICE_CLASS,
+					     NOUVEAU_ABI16_DEVICE, NV_DEVICE,
 					     &(struct nv_device_class) {
 						.device = ~0ULL,
 					     }, sizeof(struct nv_device_class),
@@ -280,7 +282,8 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 	abi16->handles |= (1ULL << init->channel);
 
 	/* create channel object and initialise dma and fence management */
-	ret = nouveau_channel_new(drm, device, NVDRM_CHAN | init->channel,
+	ret = nouveau_channel_new(drm, device,
+				  NOUVEAU_ABI16_CHAN(init->channel),
 				  init->fb_ctxdma_handle,
 				  init->tt_ctxdma_handle, &chan->chan);
 	if (ret)
@@ -330,6 +333,18 @@ done:
 	return nouveau_abi16_put(abi16, ret);
 }
 
+static struct nouveau_abi16_chan *
+nouveau_abi16_chan(struct nouveau_abi16 *abi16, int channel)
+{
+	struct nouveau_abi16_chan *chan;
+
+	list_for_each_entry(chan, &abi16->channels, head) {
+		if (chan->chan->object->handle == NOUVEAU_ABI16_CHAN(channel))
+			return chan;
+	}
+
+	return NULL;
+}
 
 int
 nouveau_abi16_ioctl_channel_free(ABI16_IOCTL_ARGS)
@@ -337,28 +352,38 @@ nouveau_abi16_ioctl_channel_free(ABI16_IOCTL_ARGS)
 	struct drm_nouveau_channel_free *req = data;
 	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv, dev);
 	struct nouveau_abi16_chan *chan;
-	int ret = -ENOENT;
 
 	if (unlikely(!abi16))
 		return -ENOMEM;
 
-	list_for_each_entry(chan, &abi16->channels, head) {
-		if (chan->chan->object->handle == (NVDRM_CHAN | req->channel)) {
-			nouveau_abi16_chan_fini(abi16, chan);
-			return nouveau_abi16_put(abi16, 0);
-		}
-	}
-
-	return nouveau_abi16_put(abi16, ret);
+	chan = nouveau_abi16_chan(abi16, req->channel);
+	if (!chan)
+		return nouveau_abi16_put(abi16, -ENOENT);
+	nouveau_abi16_chan_fini(abi16, chan);
+	return nouveau_abi16_put(abi16, 0);
 }
 
 int
 nouveau_abi16_ioctl_grobj_alloc(ABI16_IOCTL_ARGS)
 {
 	struct drm_nouveau_grobj_alloc *init = data;
+	struct {
+		struct nvif_ioctl_v0 ioctl;
+		struct nvif_ioctl_new_v0 new;
+	} args = {
+		.ioctl.owner = NVIF_IOCTL_V0_OWNER_ANY,
+		.ioctl.type = NVIF_IOCTL_V0_NEW,
+		.ioctl.path_nr = 3,
+		.ioctl.path[2] = NOUVEAU_ABI16_CLIENT,
+		.ioctl.path[1] = NOUVEAU_ABI16_DEVICE,
+		.ioctl.path[0] = NOUVEAU_ABI16_CHAN(init->channel),
+		.new.route = NVDRM_OBJECT_ABI16,
+		.new.handle = init->handle,
+		.new.oclass = init->class,
+	};
 	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv, dev);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_object *object;
+	struct nvif_client *client;
 	int ret;
 
 	if (unlikely(!abi16))
@@ -366,6 +391,7 @@ nouveau_abi16_ioctl_grobj_alloc(ABI16_IOCTL_ARGS)
 
 	if (init->handle == ~0)
 		return nouveau_abi16_put(abi16, -EINVAL);
+	client = nvif_client(nvif_object(&abi16->device));
 
 	/* compatibility with userspace that assumes 506e for all chipsets */
 	if (init->class == 0x506e) {
@@ -374,10 +400,7 @@ nouveau_abi16_ioctl_grobj_alloc(ABI16_IOCTL_ARGS)
 			return nouveau_abi16_put(abi16, 0);
 	}
 
-	/*XXX*/
-	ret = nouveau_object_new(nv_object(nvkm_client(&abi16->device.base)),
-				 NVDRM_CHAN | init->channel, init->handle,
-				 init->class, NULL, 0, &object);
+	ret = nvif_client_ioctl(client, &args, sizeof(args));
 	return nouveau_abi16_put(abi16, ret);
 }
 
@@ -387,7 +410,7 @@ nouveau_abi16_ioctl_notifierobj_alloc(ABI16_IOCTL_ARGS)
 	struct drm_nouveau_notifierobj_alloc *info = data;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv, dev);
-	struct nouveau_abi16_chan *chan = NULL, *temp;
+	struct nouveau_abi16_chan *chan;
 	struct nouveau_abi16_ntfy *ntfy;
 	struct nvif_device *device = &abi16->device;
 	struct nouveau_object *object;
@@ -401,13 +424,7 @@ nouveau_abi16_ioctl_notifierobj_alloc(ABI16_IOCTL_ARGS)
 	if (unlikely(device->info.family >= NV_DEVICE_INFO_V0_FERMI))
 		return nouveau_abi16_put(abi16, -EINVAL);
 
-	list_for_each_entry(temp, &abi16->channels, head) {
-		if (temp->chan->object->handle == (NVDRM_CHAN | info->channel)) {
-			chan = temp;
-			break;
-		}
-	}
-
+	chan = nouveau_abi16_chan(abi16, info->channel);
 	if (!chan)
 		return nouveau_abi16_put(abi16, -ENOENT);
 
@@ -461,20 +478,14 @@ nouveau_abi16_ioctl_gpuobj_free(ABI16_IOCTL_ARGS)
 {
 	struct drm_nouveau_gpuobj_free *fini = data;
 	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv, dev);
-	struct nouveau_abi16_chan *chan = NULL, *temp;
+	struct nouveau_abi16_chan *chan;
 	struct nouveau_abi16_ntfy *ntfy;
 	int ret;
 
 	if (unlikely(!abi16))
 		return -ENOMEM;
 
-	list_for_each_entry(temp, &abi16->channels, head) {
-		if (temp->chan->object->handle == (NVDRM_CHAN | fini->channel)) {
-			chan = temp;
-			break;
-		}
-	}
-
+	chan = nouveau_abi16_chan(abi16, fini->channel);
 	if (!chan)
 		return nouveau_abi16_put(abi16, -ENOENT);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.h b/drivers/gpu/drm/nouveau/nouveau_drm.h
index 1fe401a898da..5479013e13f4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.h
@@ -31,7 +31,6 @@
 #include <core/class.h>
 
 #include <drmP.h>
-#include <drm/nouveau_drm.h>
 
 #include <drm/ttm/ttm_bo_api.h>
 #include <drm/ttm/ttm_bo_driver.h>
@@ -40,6 +39,8 @@
 #include <drm/ttm/ttm_module.h>
 #include <drm/ttm/ttm_page_alloc.h>
 
+#include "uapi/drm/nouveau_drm.h"
+
 struct nouveau_channel;
 struct platform_device;
 
diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h
index 2a5769fdf8ba..ed0b7bd4b473 100644
--- a/include/uapi/drm/nouveau_drm.h
+++ b/include/uapi/drm/nouveau_drm.h
@@ -25,6 +25,14 @@
 #ifndef __NOUVEAU_DRM_H__
 #define __NOUVEAU_DRM_H__
 
+/* reserved object handles when using deprecated object APIs - these
+ * are here so that libdrm can allow interoperability with the new
+ * object APIs
+ */
+#define NOUVEAU_ABI16_CLIENT   0xffffffff
+#define NOUVEAU_ABI16_DEVICE   0xdddddddd
+#define NOUVEAU_ABI16_CHAN(n) (0xcccc0000 | (n))
+
 #define NOUVEAU_GEM_DOMAIN_CPU       (1 << 0)
 #define NOUVEAU_GEM_DOMAIN_VRAM      (1 << 1)
 #define NOUVEAU_GEM_DOMAIN_GART      (1 << 2)
-- 
cgit v1.2.3


From 27111a23d01c1dba3180c998629004ab4c9ac985 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Sun, 10 Aug 2014 04:10:31 +1000
Subject: drm/nouveau: expose the full object/event interfaces to userspace

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/Makefile       |   2 +-
 drivers/gpu/drm/nouveau/nouveau_drm.c  |  25 ++-
 drivers/gpu/drm/nouveau/nouveau_drm.h  |   9 +-
 drivers/gpu/drm/nouveau/nouveau_nvif.c |   3 +
 drivers/gpu/drm/nouveau/nouveau_usif.c | 384 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/nouveau/nouveau_usif.h |   9 +
 drivers/gpu/drm/nouveau/nvif/client.c  |   1 +
 drivers/gpu/drm/nouveau/nvif/driver.h  |   1 +
 include/uapi/drm/nouveau_drm.h         |   3 +
 9 files changed, 427 insertions(+), 10 deletions(-)
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_usif.c
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_usif.h

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile
index 1a44d952cc3a..f5d7f7ce4bc6 100644
--- a/drivers/gpu/drm/nouveau/Makefile
+++ b/drivers/gpu/drm/nouveau/Makefile
@@ -340,7 +340,7 @@ nouveau-y += nouveau_drm.o nouveau_chan.o nouveau_dma.o nouveau_fence.o
 nouveau-y += nouveau_vga.o nouveau_agp.o
 nouveau-y += nouveau_ttm.o nouveau_sgdma.o nouveau_bo.o nouveau_gem.o
 nouveau-y += nouveau_prime.o nouveau_abi16.o
-nouveau-y += nouveau_nvif.o
+nouveau-y += nouveau_nvif.o nouveau_usif.o
 nouveau-y += nv04_fence.o nv10_fence.o nv17_fence.o
 nouveau-y += nv50_fence.o nv84_fence.o nvc0_fence.o
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 14fb8e86f5bc..606cc6b4922f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -50,6 +50,7 @@
 #include "nouveau_fbcon.h"
 #include "nouveau_fence.h"
 #include "nouveau_debugfs.h"
+#include "nouveau_usif.h"
 
 MODULE_PARM_DESC(config, "option string to pass to driver core");
 static char *nouveau_config;
@@ -107,8 +108,10 @@ nouveau_cli_create(u64 name, const char *sname,
 		int ret = nvif_client_init(NULL, NULL, sname, name,
 					   nouveau_config, nouveau_debug,
 					  &cli->base);
-		if (ret == 0)
+		if (ret == 0) {
 			mutex_init(&cli->mutex);
+			usif_client_init(cli);
+		}
 		return ret;
 	}
 	return -ENOMEM;
@@ -119,6 +122,7 @@ nouveau_cli_destroy(struct nouveau_cli *cli)
 {
 	nouveau_vm_ref(NULL, &nvkm_client(&cli->base)->vm, NULL);
 	nvif_client_fini(&cli->base);
+	usif_client_fini(cli);
 }
 
 static void
@@ -810,24 +814,31 @@ nouveau_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_UNLOCKED|DRM_AUTH|DRM_RENDER_ALLOW),
 };
 
-long nouveau_drm_ioctl(struct file *filp,
-		       unsigned int cmd, unsigned long arg)
+long
+nouveau_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	struct drm_file *file_priv = filp->private_data;
-	struct drm_device *dev;
+	struct drm_file *filp = file->private_data;
+	struct drm_device *dev = filp->minor->dev;
 	long ret;
-	dev = file_priv->minor->dev;
 
 	ret = pm_runtime_get_sync(dev->dev);
 	if (ret < 0 && ret != -EACCES)
 		return ret;
 
-	ret = drm_ioctl(filp, cmd, arg);
+	switch (_IOC_NR(cmd) - DRM_COMMAND_BASE) {
+	case DRM_NOUVEAU_NVIF:
+		ret = usif_ioctl(filp, (void __user *)arg, _IOC_SIZE(cmd));
+		break;
+	default:
+		ret = drm_ioctl(file, cmd, arg);
+		break;
+	}
 
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_put_autosuspend(dev->dev);
 	return ret;
 }
+
 static const struct file_operations
 nouveau_driver_fops = {
 	.owner = THIS_MODULE,
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.h b/drivers/gpu/drm/nouveau/nouveau_drm.h
index 3a6ef5018f52..b02b02452c85 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.h
@@ -9,8 +9,8 @@
 #define DRIVER_DATE		"20120801"
 
 #define DRIVER_MAJOR		1
-#define DRIVER_MINOR		1
-#define DRIVER_PATCHLEVEL	2
+#define DRIVER_MINOR		2
+#define DRIVER_PATCHLEVEL	0
 
 /*
  * 1.1.1:
@@ -23,6 +23,9 @@
  *        bounds access to local memory to be silently ignored / return 0).
  * 1.1.2:
  *      - fixes multiple bugs in flip completion events and timestamping
+ * 1.2.0:
+ * 	- object api exposed to userspace
+ * 	- fermi,kepler,maxwell zbc
  */
 
 #include <nvif/client.h>
@@ -79,6 +82,8 @@ struct nouveau_cli {
 	struct list_head head;
 	struct mutex mutex;
 	void *abi16;
+	struct list_head objects;
+	struct list_head notifys;
 };
 
 static inline struct nouveau_cli *
diff --git a/drivers/gpu/drm/nouveau/nouveau_nvif.c b/drivers/gpu/drm/nouveau/nouveau_nvif.c
index c3838bffe24f..47ca88623753 100644
--- a/drivers/gpu/drm/nouveau/nouveau_nvif.c
+++ b/drivers/gpu/drm/nouveau/nouveau_nvif.c
@@ -37,6 +37,7 @@
 #include <nvif/ioctl.h>
 
 #include "nouveau_drm.h"
+#include "nouveau_usif.h"
 
 static void
 nvkm_client_unmap(void *priv, void *ptr, u32 size)
@@ -95,6 +96,8 @@ nvkm_client_ntfy(const void *header, u32 length, const void *data, u32 size)
 	switch (route) {
 	case NVDRM_NOTIFY_NVIF:
 		return nvif_notify(header, length, data, size);
+	case NVDRM_NOTIFY_USIF:
+		return usif_notify(header, length, data, size);
 	default:
 		WARN_ON(1);
 		break;
diff --git a/drivers/gpu/drm/nouveau/nouveau_usif.c b/drivers/gpu/drm/nouveau/nouveau_usif.c
new file mode 100644
index 000000000000..cb1182d7e80e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_usif.c
@@ -0,0 +1,384 @@
+/*
+ * Copyright 2014 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+
+#include "nouveau_drm.h"
+#include "nouveau_usif.h"
+
+#include <nvif/notify.h>
+#include <nvif/unpack.h>
+#include <nvif/client.h>
+#include <nvif/event.h>
+#include <nvif/ioctl.h>
+
+struct usif_notify_p {
+	struct drm_pending_event base;
+	struct {
+		struct drm_event base;
+		u8 data[];
+	} e;
+};
+
+struct usif_notify {
+	struct list_head head;
+	atomic_t enabled;
+	u32 handle;
+	u16 reply;
+	u8  route;
+	u64 token;
+	struct usif_notify_p *p;
+};
+
+static inline struct usif_notify *
+usif_notify_find(struct drm_file *filp, u32 handle)
+{
+	struct nouveau_cli *cli = nouveau_cli(filp);
+	struct usif_notify *ntfy;
+	list_for_each_entry(ntfy, &cli->notifys, head) {
+		if (ntfy->handle == handle)
+			return ntfy;
+	}
+	return NULL;
+}
+
+static inline void
+usif_notify_dtor(struct usif_notify *ntfy)
+{
+	list_del(&ntfy->head);
+	kfree(ntfy);
+}
+
+int
+usif_notify(const void *header, u32 length, const void *data, u32 size)
+{
+	struct usif_notify *ntfy = NULL;
+	const union {
+		struct nvif_notify_rep_v0 v0;
+	} *rep = header;
+	struct drm_device *dev;
+	struct drm_file *filp;
+	unsigned long flags;
+
+	if (length == sizeof(rep->v0) && rep->v0.version == 0) {
+		if (WARN_ON(!(ntfy = (void *)(unsigned long)rep->v0.token)))
+			return NVIF_NOTIFY_DROP;
+		BUG_ON(rep->v0.route != NVDRM_NOTIFY_USIF);
+	} else
+	if (WARN_ON(1))
+		return NVIF_NOTIFY_DROP;
+
+	if (WARN_ON(!ntfy->p || ntfy->reply != (length + size)))
+		return NVIF_NOTIFY_DROP;
+	filp = ntfy->p->base.file_priv;
+	dev = filp->minor->dev;
+
+	memcpy(&ntfy->p->e.data[0], header, length);
+	memcpy(&ntfy->p->e.data[length], data, size);
+	switch (rep->v0.version) {
+	case 0: {
+		struct nvif_notify_rep_v0 *rep = (void *)ntfy->p->e.data;
+		rep->route = ntfy->route;
+		rep->token = ntfy->token;
+	}
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+
+	spin_lock_irqsave(&dev->event_lock, flags);
+	if (!WARN_ON(filp->event_space < ntfy->p->e.base.length)) {
+		list_add_tail(&ntfy->p->base.link, &filp->event_list);
+		filp->event_space -= ntfy->p->e.base.length;
+	}
+	wake_up_interruptible(&filp->event_wait);
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+	atomic_set(&ntfy->enabled, 0);
+	return NVIF_NOTIFY_DROP;
+}
+
+static int
+usif_notify_new(struct drm_file *f, void *data, u32 size, void *argv, u32 argc)
+{
+	struct nouveau_cli *cli = nouveau_cli(f);
+	struct nvif_client *client = &cli->base;
+	union {
+		struct nvif_ioctl_ntfy_new_v0 v0;
+	} *args = data;
+	union {
+		struct nvif_notify_req_v0 v0;
+	} *req;
+	struct usif_notify *ntfy;
+	int ret;
+
+	if (nvif_unpack(args->v0, 0, 0, true)) {
+		if (usif_notify_find(f, args->v0.index))
+			return -EEXIST;
+	} else
+		return ret;
+	req = data;
+
+	if (!(ntfy = kmalloc(sizeof(*ntfy), GFP_KERNEL)))
+		return -ENOMEM;
+	atomic_set(&ntfy->enabled, 0);
+
+	if (nvif_unpack(req->v0, 0, 0, true)) {
+		ntfy->reply = sizeof(struct nvif_notify_rep_v0) + req->v0.reply;
+		ntfy->route = req->v0.route;
+		ntfy->token = req->v0.token;
+		req->v0.route = NVDRM_NOTIFY_USIF;
+		req->v0.token = (unsigned long)(void *)ntfy;
+		ret = nvif_client_ioctl(client, argv, argc);
+		req->v0.token = ntfy->token;
+		req->v0.route = ntfy->route;
+		ntfy->handle = args->v0.index;
+	}
+
+	if (ret == 0)
+		list_add(&ntfy->head, &cli->notifys);
+	if (ret)
+		kfree(ntfy);
+	return ret;
+}
+
+static int
+usif_notify_del(struct drm_file *f, void *data, u32 size, void *argv, u32 argc)
+{
+	struct nouveau_cli *cli = nouveau_cli(f);
+	struct nvif_client *client = &cli->base;
+	union {
+		struct nvif_ioctl_ntfy_del_v0 v0;
+	} *args = data;
+	struct usif_notify *ntfy;
+	int ret;
+
+	if (nvif_unpack(args->v0, 0, 0, true)) {
+		if (!(ntfy = usif_notify_find(f, args->v0.index)))
+			return -ENOENT;
+	} else
+		return ret;
+
+	ret = nvif_client_ioctl(client, argv, argc);
+	if (ret == 0)
+		usif_notify_dtor(ntfy);
+	return ret;
+}
+
+static int
+usif_notify_get(struct drm_file *f, void *data, u32 size, void *argv, u32 argc)
+{
+	struct nouveau_cli *cli = nouveau_cli(f);
+	struct nvif_client *client = &cli->base;
+	union {
+		struct nvif_ioctl_ntfy_del_v0 v0;
+	} *args = data;
+	struct usif_notify *ntfy;
+	int ret;
+
+	if (nvif_unpack(args->v0, 0, 0, true)) {
+		if (!(ntfy = usif_notify_find(f, args->v0.index)))
+			return -ENOENT;
+	} else
+		return ret;
+
+	if (atomic_xchg(&ntfy->enabled, 1))
+		return 0;
+
+	ntfy->p = kmalloc(sizeof(*ntfy->p) + ntfy->reply, GFP_KERNEL);
+	if (ret = -ENOMEM, !ntfy->p)
+		goto done;
+	ntfy->p->base.event = &ntfy->p->e.base;
+	ntfy->p->base.file_priv = f;
+	ntfy->p->base.pid = current->pid;
+	ntfy->p->base.destroy =(void(*)(struct drm_pending_event *))kfree;
+	ntfy->p->e.base.type = DRM_NOUVEAU_EVENT_NVIF;
+	ntfy->p->e.base.length = sizeof(ntfy->p->e.base) + ntfy->reply;
+
+	ret = nvif_client_ioctl(client, argv, argc);
+done:
+	if (ret) {
+		atomic_set(&ntfy->enabled, 0);
+		kfree(ntfy->p);
+	}
+	return ret;
+}
+
+static int
+usif_notify_put(struct drm_file *f, void *data, u32 size, void *argv, u32 argc)
+{
+	struct nouveau_cli *cli = nouveau_cli(f);
+	struct nvif_client *client = &cli->base;
+	union {
+		struct nvif_ioctl_ntfy_put_v0 v0;
+	} *args = data;
+	struct usif_notify *ntfy;
+	int ret;
+
+	if (nvif_unpack(args->v0, 0, 0, true)) {
+		if (!(ntfy = usif_notify_find(f, args->v0.index)))
+			return -ENOENT;
+	} else
+		return ret;
+
+	ret = nvif_client_ioctl(client, argv, argc);
+	if (ret == 0 && atomic_xchg(&ntfy->enabled, 0))
+		kfree(ntfy->p);
+	return ret;
+}
+
+struct usif_object {
+	struct list_head head;
+	struct list_head ntfy;
+	u8  route;
+	u64 token;
+};
+
+static void
+usif_object_dtor(struct usif_object *object)
+{
+	list_del(&object->head);
+	kfree(object);
+}
+
+static int
+usif_object_new(struct drm_file *f, void *data, u32 size, void *argv, u32 argc)
+{
+	struct nouveau_cli *cli = nouveau_cli(f);
+	struct nvif_client *client = &cli->base;
+	union {
+		struct nvif_ioctl_new_v0 v0;
+	} *args = data;
+	struct usif_object *object;
+	int ret;
+
+	if (!(object = kmalloc(sizeof(*object), GFP_KERNEL)))
+		return -ENOMEM;
+	list_add(&object->head, &cli->objects);
+
+	if (nvif_unpack(args->v0, 0, 0, true)) {
+		object->route = args->v0.route;
+		object->token = args->v0.token;
+		args->v0.route = NVDRM_OBJECT_USIF;
+		args->v0.token = (unsigned long)(void *)object;
+		ret = nvif_client_ioctl(client, argv, argc);
+		args->v0.token = object->token;
+		args->v0.route = object->route;
+	}
+
+	if (ret)
+		usif_object_dtor(object);
+	return ret;
+}
+
+int
+usif_ioctl(struct drm_file *filp, void __user *user, u32 argc)
+{
+	struct nouveau_cli *cli = nouveau_cli(filp);
+	struct nvif_client *client = &cli->base;
+	void *data = kmalloc(argc, GFP_KERNEL);
+	u32   size = argc;
+	union {
+		struct nvif_ioctl_v0 v0;
+	} *argv = data;
+	struct usif_object *object;
+	u8 owner;
+	int ret;
+
+	if (ret = -ENOMEM, !argv)
+		goto done;
+	if (ret = -EFAULT, copy_from_user(argv, user, size))
+		goto done;
+
+	if (nvif_unpack(argv->v0, 0, 0, true)) {
+		/* block access to objects not created via this interface */
+		owner = argv->v0.owner;
+		argv->v0.owner = NVDRM_OBJECT_USIF;
+	} else
+		goto done;
+
+	mutex_lock(&cli->mutex);
+	switch (argv->v0.type) {
+	case NVIF_IOCTL_V0_NEW:
+		/* ... except if we're creating children */
+		argv->v0.owner = NVIF_IOCTL_V0_OWNER_ANY;
+		ret = usif_object_new(filp, data, size, argv, argc);
+		break;
+	case NVIF_IOCTL_V0_NTFY_NEW:
+		ret = usif_notify_new(filp, data, size, argv, argc);
+		break;
+	case NVIF_IOCTL_V0_NTFY_DEL:
+		ret = usif_notify_del(filp, data, size, argv, argc);
+		break;
+	case NVIF_IOCTL_V0_NTFY_GET:
+		ret = usif_notify_get(filp, data, size, argv, argc);
+		break;
+	case NVIF_IOCTL_V0_NTFY_PUT:
+		ret = usif_notify_put(filp, data, size, argv, argc);
+		break;
+	default:
+		ret = nvif_client_ioctl(client, argv, argc);
+		break;
+	}
+	if (argv->v0.route == NVDRM_OBJECT_USIF) {
+		object = (void *)(unsigned long)argv->v0.token;
+		argv->v0.route = object->route;
+		argv->v0.token = object->token;
+		if (ret == 0 && argv->v0.type == NVIF_IOCTL_V0_DEL) {
+			list_del(&object->head);
+			kfree(object);
+		}
+	} else {
+		argv->v0.route = NVIF_IOCTL_V0_ROUTE_HIDDEN;
+		argv->v0.token = 0;
+	}
+	argv->v0.owner = owner;
+	mutex_unlock(&cli->mutex);
+
+	if (copy_to_user(user, argv, argc))
+		ret = -EFAULT;
+done:
+	kfree(argv);
+	return ret;
+}
+
+void
+usif_client_fini(struct nouveau_cli *cli)
+{
+	struct usif_object *object, *otemp;
+	struct usif_notify *notify, *ntemp;
+
+	list_for_each_entry_safe(notify, ntemp, &cli->notifys, head) {
+		usif_notify_dtor(notify);
+	}
+
+	list_for_each_entry_safe(object, otemp, &cli->objects, head) {
+		usif_object_dtor(object);
+	}
+}
+
+void
+usif_client_init(struct nouveau_cli *cli)
+{
+	INIT_LIST_HEAD(&cli->objects);
+	INIT_LIST_HEAD(&cli->notifys);
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_usif.h b/drivers/gpu/drm/nouveau/nouveau_usif.h
new file mode 100644
index 000000000000..c037e3ae8c70
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_usif.h
@@ -0,0 +1,9 @@
+#ifndef __NOUVEAU_USIF_H__
+#define __NOUVEAU_USIF_H__
+
+void usif_client_init(struct nouveau_cli *);
+void usif_client_fini(struct nouveau_cli *);
+int  usif_ioctl(struct drm_file *, void __user *, u32);
+int  usif_notify(const void *, u32, const void *, u32);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvif/client.c b/drivers/gpu/drm/nouveau/nvif/client.c
index 20fb38688b93..3c4df1fc26dc 100644
--- a/drivers/gpu/drm/nouveau/nvif/client.c
+++ b/drivers/gpu/drm/nouveau/nvif/client.c
@@ -60,6 +60,7 @@ nvif_drivers[] = {
 #ifdef __KERNEL__
 	&nvif_driver_nvkm,
 #else
+	&nvif_driver_drm,
 	&nvif_driver_lib,
 #endif
 	NULL
diff --git a/drivers/gpu/drm/nouveau/nvif/driver.h b/drivers/gpu/drm/nouveau/nvif/driver.h
index ea5b1b880fb6..b72a8f0c2758 100644
--- a/drivers/gpu/drm/nouveau/nvif/driver.h
+++ b/drivers/gpu/drm/nouveau/nvif/driver.h
@@ -15,6 +15,7 @@ struct nvif_driver {
 };
 
 extern const struct nvif_driver nvif_driver_nvkm;
+extern const struct nvif_driver nvif_driver_drm;
 extern const struct nvif_driver nvif_driver_lib;
 
 #endif
diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h
index ed0b7bd4b473..0d7608dc1a34 100644
--- a/include/uapi/drm/nouveau_drm.h
+++ b/include/uapi/drm/nouveau_drm.h
@@ -25,6 +25,8 @@
 #ifndef __NOUVEAU_DRM_H__
 #define __NOUVEAU_DRM_H__
 
+#define DRM_NOUVEAU_EVENT_NVIF                                       0x80000000
+
 /* reserved object handles when using deprecated object APIs - these
  * are here so that libdrm can allow interoperability with the new
  * object APIs
@@ -131,6 +133,7 @@ struct drm_nouveau_gem_cpu_fini {
 #define DRM_NOUVEAU_GROBJ_ALLOC        0x04 /* deprecated */
 #define DRM_NOUVEAU_NOTIFIEROBJ_ALLOC  0x05 /* deprecated */
 #define DRM_NOUVEAU_GPUOBJ_FREE        0x06 /* deprecated */
+#define DRM_NOUVEAU_NVIF               0x07
 #define DRM_NOUVEAU_GEM_NEW            0x40
 #define DRM_NOUVEAU_GEM_PUSHBUF        0x41
 #define DRM_NOUVEAU_GEM_CPU_PREP       0x42
-- 
cgit v1.2.3


From 0f29b46d49b0ca50536632c6a33986c3171f5ea1 Mon Sep 17 00:00:00 2001
From: Ira Weiny <ira.weiny@intel.com>
Date: Fri, 8 Aug 2014 19:00:55 -0400
Subject: IB/mad: add new ioctl to ABI to support new registration options

Registrations options are specified through flags.  Definitions of flags will
be in subsequent patches.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 Documentation/infiniband/user_mad.txt   |  13 ++--
 drivers/infiniband/core/agent.c         |   4 +-
 drivers/infiniband/core/cm.c            |   5 +-
 drivers/infiniband/core/mad.c           |   4 +-
 drivers/infiniband/core/sa_query.c      |   2 +-
 drivers/infiniband/core/user_mad.c      | 120 +++++++++++++++++++++++++++++++-
 drivers/infiniband/hw/mlx4/mad.c        |   2 +-
 drivers/infiniband/hw/mthca/mthca_mad.c |   2 +-
 drivers/infiniband/hw/qib/qib_mad.c     |   2 +-
 drivers/infiniband/ulp/srpt/ib_srpt.c   |   2 +-
 include/rdma/ib_mad.h                   |   7 +-
 include/uapi/rdma/ib_user_mad.h         |  39 +++++++++++
 12 files changed, 186 insertions(+), 16 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/infiniband/user_mad.txt b/Documentation/infiniband/user_mad.txt
index 8a366959f5cc..7aca13a54a3a 100644
--- a/Documentation/infiniband/user_mad.txt
+++ b/Documentation/infiniband/user_mad.txt
@@ -26,6 +26,11 @@ Creating MAD agents
   ioctl.  Also, all agents registered through a file descriptor will
   be unregistered when the descriptor is closed.
 
+  2014 -- a new registration ioctl is now provided which allows additional
+       fields to be provided during registration.
+       Users of this registration call are implicitly setting the use of
+       pkey_index (see below).
+
 Receiving MADs
 
   MADs are received using read().  The receive side now supports
@@ -104,10 +109,10 @@ P_Key Index Handling
   The old ib_umad interface did not allow setting the P_Key index for
   MADs that are sent and did not provide a way for obtaining the P_Key
   index of received MADs.  A new layout for struct ib_user_mad_hdr
-  with a pkey_index member has been defined; however, to preserve
-  binary compatibility with older applications, this new layout will
-  not be used unless the IB_USER_MAD_ENABLE_PKEY ioctl is called
-  before a file descriptor is used for anything else.
+  with a pkey_index member has been defined; however, to preserve binary
+  compatibility with older applications, this new layout will not be used
+  unless one of IB_USER_MAD_ENABLE_PKEY or IB_USER_MAD_REGISTER_AGENT2 ioctl's
+  are called before a file descriptor is used for anything else.
 
   In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented
   to 6, the new layout of struct ib_user_mad_hdr will be used by
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 8e32c5abd09d..f6d29614cb01 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -161,7 +161,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
 		port_priv->agent[0] = ib_register_mad_agent(device, port_num,
 							    IB_QPT_SMI, NULL, 0,
 							    &agent_send_handler,
-							    NULL, NULL);
+							    NULL, NULL, 0);
 		if (IS_ERR(port_priv->agent[0])) {
 			ret = PTR_ERR(port_priv->agent[0]);
 			goto error2;
@@ -172,7 +172,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
 	port_priv->agent[1] = ib_register_mad_agent(device, port_num,
 						    IB_QPT_GSI, NULL, 0,
 						    &agent_send_handler,
-						    NULL, NULL);
+						    NULL, NULL, 0);
 	if (IS_ERR(port_priv->agent[1])) {
 		ret = PTR_ERR(port_priv->agent[1]);
 		goto error3;
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index c3239170d8b7..e28a494e2a3a 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3753,7 +3753,7 @@ static void cm_add_one(struct ib_device *ib_device)
 	struct cm_port *port;
 	struct ib_mad_reg_req reg_req = {
 		.mgmt_class = IB_MGMT_CLASS_CM,
-		.mgmt_class_version = IB_CM_CLASS_VERSION
+		.mgmt_class_version = IB_CM_CLASS_VERSION,
 	};
 	struct ib_port_modify port_modify = {
 		.set_port_cap_mask = IB_PORT_CM_SUP
@@ -3801,7 +3801,8 @@ static void cm_add_one(struct ib_device *ib_device)
 							0,
 							cm_send_handler,
 							cm_recv_handler,
-							port);
+							port,
+							0);
 		if (IS_ERR(port->mad_agent))
 			goto error2;
 
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index f46995d4f54b..988bbda67952 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -198,7 +198,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 					   u8 rmpp_version,
 					   ib_mad_send_handler send_handler,
 					   ib_mad_recv_handler recv_handler,
-					   void *context)
+					   void *context,
+					   u32 registration_flags)
 {
 	struct ib_mad_port_private *port_priv;
 	struct ib_mad_agent *ret = ERR_PTR(-EINVAL);
@@ -359,6 +360,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 	mad_agent_priv->agent.context = context;
 	mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
 	mad_agent_priv->agent.port_num = port_num;
+	mad_agent_priv->agent.flags = registration_flags;
 	spin_lock_init(&mad_agent_priv->lock);
 	INIT_LIST_HEAD(&mad_agent_priv->send_list);
 	INIT_LIST_HEAD(&mad_agent_priv->wait_list);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 233eaf541f55..c38f030f0dc9 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1184,7 +1184,7 @@ static void ib_sa_add_one(struct ib_device *device)
 		sa_dev->port[i].agent =
 			ib_register_mad_agent(device, i + s, IB_QPT_GSI,
 					      NULL, 0, send_handler,
-					      recv_handler, sa_dev);
+					      recv_handler, sa_dev, 0);
 		if (IS_ERR(sa_dev->port[i].agent))
 			goto err;
 
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index ea90a3ebb9c2..11af1c61c135 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -647,6 +647,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
 
 found:
 	if (ureq.mgmt_class) {
+		memset(&req, 0, sizeof(req));
 		req.mgmt_class         = ureq.mgmt_class;
 		req.mgmt_class_version = ureq.mgmt_class_version;
 		memcpy(req.oui, ureq.oui, sizeof req.oui);
@@ -667,7 +668,7 @@ found:
 				      ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI,
 				      ureq.mgmt_class ? &req : NULL,
 				      ureq.rmpp_version,
-				      send_handler, recv_handler, file);
+				      send_handler, recv_handler, file, 0);
 	if (IS_ERR(agent)) {
 		ret = PTR_ERR(agent);
 		agent = NULL;
@@ -705,6 +706,119 @@ out:
 	return ret;
 }
 
+static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
+{
+	struct ib_user_mad_reg_req2 ureq;
+	struct ib_mad_reg_req req;
+	struct ib_mad_agent *agent = NULL;
+	int agent_id;
+	int ret;
+
+	mutex_lock(&file->port->file_mutex);
+	mutex_lock(&file->mutex);
+
+	if (!file->port->ib_dev) {
+		dev_notice(file->port->dev,
+			   "ib_umad_reg_agent2: invalid device\n");
+		ret = -EPIPE;
+		goto out;
+	}
+
+	if (copy_from_user(&ureq, arg, sizeof(ureq))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	if (ureq.qpn != 0 && ureq.qpn != 1) {
+		dev_notice(file->port->dev,
+			   "ib_umad_reg_agent2: invalid QPN %d specified\n",
+			   ureq.qpn);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (ureq.flags & ~IB_USER_MAD_REG_FLAGS_CAP) {
+		dev_notice(file->port->dev,
+			   "ib_umad_reg_agent2 failed: invalid registration flags specified 0x%x; supported 0x%x\n",
+			   ureq.flags, IB_USER_MAD_REG_FLAGS_CAP);
+		ret = -EINVAL;
+
+		if (put_user((u32)IB_USER_MAD_REG_FLAGS_CAP,
+				(u32 __user *) (arg + offsetof(struct
+				ib_user_mad_reg_req2, flags))))
+			ret = -EFAULT;
+
+		goto out;
+	}
+
+	for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id)
+		if (!__get_agent(file, agent_id))
+			goto found;
+
+	dev_notice(file->port->dev,
+		   "ib_umad_reg_agent2: Max Agents (%u) reached\n",
+		   IB_UMAD_MAX_AGENTS);
+	ret = -ENOMEM;
+	goto out;
+
+found:
+	if (ureq.mgmt_class) {
+		memset(&req, 0, sizeof(req));
+		req.mgmt_class         = ureq.mgmt_class;
+		req.mgmt_class_version = ureq.mgmt_class_version;
+		if (ureq.oui & 0xff000000) {
+			dev_notice(file->port->dev,
+				   "ib_umad_reg_agent2 failed: oui invalid 0x%08x\n",
+				   ureq.oui);
+			ret = -EINVAL;
+			goto out;
+		}
+		req.oui[2] =  ureq.oui & 0x0000ff;
+		req.oui[1] = (ureq.oui & 0x00ff00) >> 8;
+		req.oui[0] = (ureq.oui & 0xff0000) >> 16;
+		memcpy(req.method_mask, ureq.method_mask,
+			sizeof(req.method_mask));
+	}
+
+	agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num,
+				      ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI,
+				      ureq.mgmt_class ? &req : NULL,
+				      ureq.rmpp_version,
+				      send_handler, recv_handler, file,
+				      ureq.flags);
+	if (IS_ERR(agent)) {
+		ret = PTR_ERR(agent);
+		agent = NULL;
+		goto out;
+	}
+
+	if (put_user(agent_id,
+		     (u32 __user *)(arg +
+				offsetof(struct ib_user_mad_reg_req2, id)))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	if (!file->already_used) {
+		file->already_used = 1;
+		file->use_pkey_index = 1;
+	}
+
+	file->agent[agent_id] = agent;
+	ret = 0;
+
+out:
+	mutex_unlock(&file->mutex);
+
+	if (ret && agent)
+		ib_unregister_mad_agent(agent);
+
+	mutex_unlock(&file->port->file_mutex);
+
+	return ret;
+}
+
+
 static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
 {
 	struct ib_mad_agent *agent = NULL;
@@ -760,6 +874,8 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
 		return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg);
 	case IB_USER_MAD_ENABLE_PKEY:
 		return ib_umad_enable_pkey(filp->private_data);
+	case IB_USER_MAD_REGISTER_AGENT2:
+		return ib_umad_reg_agent2(filp->private_data, (void __user *) arg);
 	default:
 		return -ENOIOCTLCMD;
 	}
@@ -776,6 +892,8 @@ static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd,
 		return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg));
 	case IB_USER_MAD_ENABLE_PKEY:
 		return ib_umad_enable_pkey(filp->private_data);
+	case IB_USER_MAD_REGISTER_AGENT2:
+		return ib_umad_reg_agent2(filp->private_data, compat_ptr(arg));
 	default:
 		return -ENOIOCTLCMD;
 	}
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 287ad0564acd..82a7dd87089b 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -891,7 +891,7 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
 				agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
 							      q ? IB_QPT_GSI : IB_QPT_SMI,
 							      NULL, 0, send_handler,
-							      NULL, NULL);
+							      NULL, NULL, 0);
 				if (IS_ERR(agent)) {
 					ret = PTR_ERR(agent);
 					goto err;
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index b6f7f457fc55..8881fa376e06 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -294,7 +294,7 @@ int mthca_create_agents(struct mthca_dev *dev)
 			agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
 						      q ? IB_QPT_GSI : IB_QPT_SMI,
 						      NULL, 0, send_handler,
-						      NULL, NULL);
+						      NULL, NULL, 0);
 			if (IS_ERR(agent)) {
 				ret = PTR_ERR(agent);
 				goto err;
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 22c720e5740d..636be117b578 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -2476,7 +2476,7 @@ int qib_create_agents(struct qib_ibdev *dev)
 		ibp = &dd->pport[p].ibport_data;
 		agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI,
 					      NULL, 0, send_handler,
-					      NULL, NULL);
+					      NULL, NULL, 0);
 		if (IS_ERR(agent)) {
 			ret = PTR_ERR(agent);
 			goto err;
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index fe09f2788b15..8a8311e35cbd 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -563,7 +563,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
 							 &reg_req, 0,
 							 srpt_mad_send_handler,
 							 srpt_mad_recv_handler,
-							 sport);
+							 sport, 0);
 		if (IS_ERR(sport->mad_agent)) {
 			ret = PTR_ERR(sport->mad_agent);
 			sport->mad_agent = NULL;
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 3d81b90cc315..876f497f8b0c 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -355,6 +355,7 @@ typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
  * @hi_tid: Access layer assigned transaction ID for this client.
  *   Unsolicited MADs sent by this client will have the upper 32-bits
  *   of their TID set to this value.
+ * @flags: registration flags
  * @port_num: Port number on which QP is registered
  * @rmpp_version: If set, indicates the RMPP version used by this agent.
  */
@@ -367,6 +368,7 @@ struct ib_mad_agent {
 	ib_mad_snoop_handler	snoop_handler;
 	void			*context;
 	u32			hi_tid;
+	u32			flags;
 	u8			port_num;
 	u8			rmpp_version;
 };
@@ -426,6 +428,7 @@ struct ib_mad_recv_wc {
  *   in the range from 0x30 to 0x4f. Otherwise not used.
  * @method_mask: The caller will receive unsolicited MADs for any method
  *   where @method_mask = 1.
+ *
  */
 struct ib_mad_reg_req {
 	u8	mgmt_class;
@@ -451,6 +454,7 @@ struct ib_mad_reg_req {
  * @recv_handler: The completion callback routine invoked for a received
  *   MAD.
  * @context: User specified context associated with the registration.
+ * @registration_flags: Registration flags to set for this agent
  */
 struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 					   u8 port_num,
@@ -459,7 +463,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 					   u8 rmpp_version,
 					   ib_mad_send_handler send_handler,
 					   ib_mad_recv_handler recv_handler,
-					   void *context);
+					   void *context,
+					   u32 registration_flags);
 
 enum ib_mad_snoop_flags {
 	/*IB_MAD_SNOOP_POSTED_SENDS	   = 1,*/
diff --git a/include/uapi/rdma/ib_user_mad.h b/include/uapi/rdma/ib_user_mad.h
index d6fce1cbdb90..c00b081dc10e 100644
--- a/include/uapi/rdma/ib_user_mad.h
+++ b/include/uapi/rdma/ib_user_mad.h
@@ -191,6 +191,42 @@ struct ib_user_mad_reg_req {
 	__u8	rmpp_version;
 };
 
+/**
+ * ib_user_mad_reg_req2 - MAD registration request
+ *
+ * @id                 - Set by the _kernel_; used by userspace to identify the
+ *                       registered agent in future requests.
+ * @qpn                - Queue pair number; must be 0 or 1.
+ * @mgmt_class         - Indicates which management class of MADs should be
+ *                       receive by the caller.  This field is only required if
+ *                       the user wishes to receive unsolicited MADs, otherwise
+ *                       it should be 0.
+ * @mgmt_class_version - Indicates which version of MADs for the given
+ *                       management class to receive.
+ * @res                - Ignored.
+ * @flags              - additional registration flags; Must be in the set of
+ *                       flags defined in IB_USER_MAD_REG_FLAGS_CAP
+ * @method_mask        - The caller wishes to receive unsolicited MADs for the
+ *                       methods whose bit(s) is(are) set.
+ * @oui                - Indicates IEEE OUI to use when mgmt_class is a vendor
+ *                       class in the range from 0x30 to 0x4f. Otherwise not
+ *                       used.
+ * @rmpp_version       - If set, indicates the RMPP version to use.
+ */
+#define IB_USER_MAD_REG_FLAGS_CAP (0)
+struct ib_user_mad_reg_req2 {
+	__u32	id;
+	__u32	qpn;
+	__u8	mgmt_class;
+	__u8	mgmt_class_version;
+	__u16   res;
+	__u32   flags;
+	__u64   method_mask[2];
+	__u32   oui;
+	__u8	rmpp_version;
+	__u8	reserved[3];
+};
+
 #define IB_IOCTL_MAGIC		0x1b
 
 #define IB_USER_MAD_REGISTER_AGENT	_IOWR(IB_IOCTL_MAGIC, 1, \
@@ -200,4 +236,7 @@ struct ib_user_mad_reg_req {
 
 #define IB_USER_MAD_ENABLE_PKEY		_IO(IB_IOCTL_MAGIC, 3)
 
+#define IB_USER_MAD_REGISTER_AGENT2     _IOWR(IB_IOCTL_MAGIC, 4, \
+					      struct ib_user_mad_reg_req2)
+
 #endif /* IB_USER_MAD_H */
-- 
cgit v1.2.3


From 1471cb6ca67990a306500e69e52ffb28c93ccbbc Mon Sep 17 00:00:00 2001
From: Ira Weiny <ira.weiny@intel.com>
Date: Fri, 8 Aug 2014 19:00:56 -0400
Subject: IB/mad: Add user space RMPP support

Using the new registration mechanism, define a flag that indicates the
user wishes to process RMPP messages in user space rather than have
the kernel process them.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/core/mad.c      | 74 ++++++++++++++++++++++++++------------
 drivers/infiniband/core/user_mad.c | 34 +++++++++++-------
 include/rdma/ib_mad.h              | 11 ++++++
 include/uapi/rdma/ib_user_mad.h    |  5 ++-
 4 files changed, 88 insertions(+), 36 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 988bbda67952..74c30f4c557e 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -283,6 +283,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 				goto error1;
 			}
 		}
+
 		/* Make sure class supplied is consistent with QP type */
 		if (qp_type == IB_QPT_SMI) {
 			if ((mad_reg_req->mgmt_class !=
@@ -309,6 +310,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 		/* No registration request supplied */
 		if (!send_handler)
 			goto error1;
+		if (registration_flags & IB_MAD_USER_RMPP)
+			goto error1;
 	}
 
 	/* Validate device and port */
@@ -907,6 +910,12 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
 	return 0;
 }
 
+int ib_mad_kernel_rmpp_agent(struct ib_mad_agent *agent)
+{
+	return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP);
+}
+EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent);
+
 struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 					    u32 remote_qpn, u16 pkey_index,
 					    int rmpp_active,
@@ -923,10 +932,12 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 	pad = get_pad_size(hdr_len, data_len);
 	message_size = hdr_len + data_len + pad;
 
-	if ((!mad_agent->rmpp_version &&
-	     (rmpp_active || message_size > sizeof(struct ib_mad))) ||
-	    (!rmpp_active && message_size > sizeof(struct ib_mad)))
-		return ERR_PTR(-EINVAL);
+	if (ib_mad_kernel_rmpp_agent(mad_agent)) {
+		if (!rmpp_active && message_size > sizeof(struct ib_mad))
+			return ERR_PTR(-EINVAL);
+	} else
+		if (rmpp_active || message_size > sizeof(struct ib_mad))
+			return ERR_PTR(-EINVAL);
 
 	size = rmpp_active ? hdr_len : sizeof(struct ib_mad);
 	buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
@@ -1180,7 +1191,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
 			      &mad_agent_priv->send_list);
 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 
-		if (mad_agent_priv->agent.rmpp_version) {
+		if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
 			ret = ib_send_rmpp_mad(mad_send_wr);
 			if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
 				ret = ib_send_mad(mad_send_wr);
@@ -1730,6 +1741,7 @@ static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv,
 
 	rmpp_mad = (struct ib_rmpp_mad *)mad_hdr;
 	return !mad_agent_priv->agent.rmpp_version ||
+		!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) ||
 		!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
 				    IB_MGMT_RMPP_FLAG_ACTIVE) ||
 		(rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
@@ -1857,7 +1869,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
 
 	INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
 	list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
-	if (mad_agent_priv->agent.rmpp_version) {
+	if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
 		mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
 						      mad_recv_wc);
 		if (!mad_recv_wc) {
@@ -1872,23 +1884,39 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
 		mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
 		if (!mad_send_wr) {
 			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
-			ib_free_recv_mad(mad_recv_wc);
-			deref_mad_agent(mad_agent_priv);
-			return;
-		}
-		ib_mark_mad_done(mad_send_wr);
-		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+			if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)
+			   && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class)
+			   && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr)
+					& IB_MGMT_RMPP_FLAG_ACTIVE)) {
+				/* user rmpp is in effect
+				 * and this is an active RMPP MAD
+				 */
+				mad_recv_wc->wc->wr_id = 0;
+				mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
+								   mad_recv_wc);
+				atomic_dec(&mad_agent_priv->refcount);
+			} else {
+				/* not user rmpp, revert to normal behavior and
+				 * drop the mad */
+				ib_free_recv_mad(mad_recv_wc);
+				deref_mad_agent(mad_agent_priv);
+				return;
+			}
+		} else {
+			ib_mark_mad_done(mad_send_wr);
+			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 
-		/* Defined behavior is to complete response before request */
-		mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
-		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
-						   mad_recv_wc);
-		atomic_dec(&mad_agent_priv->refcount);
+			/* Defined behavior is to complete response before request */
+			mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
+			mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
+							   mad_recv_wc);
+			atomic_dec(&mad_agent_priv->refcount);
 
-		mad_send_wc.status = IB_WC_SUCCESS;
-		mad_send_wc.vendor_err = 0;
-		mad_send_wc.send_buf = &mad_send_wr->send_buf;
-		ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
+			mad_send_wc.status = IB_WC_SUCCESS;
+			mad_send_wc.vendor_err = 0;
+			mad_send_wc.send_buf = &mad_send_wr->send_buf;
+			ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
+		}
 	} else {
 		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
 						   mad_recv_wc);
@@ -2128,7 +2156,7 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
 
 	mad_agent_priv = mad_send_wr->mad_agent_priv;
 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
-	if (mad_agent_priv->agent.rmpp_version) {
+	if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
 		ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc);
 		if (ret == IB_RMPP_RESULT_CONSUMED)
 			goto done;
@@ -2524,7 +2552,7 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
 
 	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
 
-	if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
+	if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) {
 		ret = ib_retry_rmpp(mad_send_wr);
 		switch (ret) {
 		case IB_RMPP_RESULT_UNHANDLED:
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 11af1c61c135..928cdd20e2d1 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -506,13 +506,15 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
 
 	rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data;
 	hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class);
-	if (!ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)) {
-		copy_offset = IB_MGMT_MAD_HDR;
-		rmpp_active = 0;
-	} else {
+
+	if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
+	    && ib_mad_kernel_rmpp_agent(agent)) {
 		copy_offset = IB_MGMT_RMPP_HDR;
 		rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
-			      IB_MGMT_RMPP_FLAG_ACTIVE;
+						IB_MGMT_RMPP_FLAG_ACTIVE;
+	} else {
+		copy_offset = IB_MGMT_MAD_HDR;
+		rmpp_active = 0;
 	}
 
 	data_len = count - hdr_size(file) - hdr_len;
@@ -558,14 +560,22 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
 		rmpp_mad->mad_hdr.tid = *tid;
 	}
 
-	spin_lock_irq(&file->send_lock);
-	ret = is_duplicate(file, packet);
-	if (!ret)
+	if (!ib_mad_kernel_rmpp_agent(agent)
+	   && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
+	   && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) {
+		spin_lock_irq(&file->send_lock);
 		list_add_tail(&packet->list, &file->send_list);
-	spin_unlock_irq(&file->send_lock);
-	if (ret) {
-		ret = -EINVAL;
-		goto err_msg;
+		spin_unlock_irq(&file->send_lock);
+	} else {
+		spin_lock_irq(&file->send_lock);
+		ret = is_duplicate(file, packet);
+		if (!ret)
+			list_add_tail(&packet->list, &file->send_list);
+		spin_unlock_irq(&file->send_lock);
+		if (ret) {
+			ret = -EINVAL;
+			goto err_msg;
+		}
 	}
 
 	ret = ib_post_send_mad(packet->msg, NULL);
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 876f497f8b0c..9bb99e983f58 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -40,6 +40,7 @@
 #include <linux/list.h>
 
 #include <rdma/ib_verbs.h>
+#include <uapi/rdma/ib_user_mad.h>
 
 /* Management base version */
 #define IB_MGMT_BASE_VERSION			1
@@ -359,6 +360,9 @@ typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
  * @port_num: Port number on which QP is registered
  * @rmpp_version: If set, indicates the RMPP version used by this agent.
  */
+enum {
+	IB_MAD_USER_RMPP = IB_USER_MAD_USER_RMPP,
+};
 struct ib_mad_agent {
 	struct ib_device	*device;
 	struct ib_qp		*qp;
@@ -666,4 +670,11 @@ void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num);
  */
 void ib_free_send_mad(struct ib_mad_send_buf *send_buf);
 
+/**
+ * ib_mad_kernel_rmpp_agent - Returns if the agent is performing RMPP.
+ * @agent: the agent in question
+ * @return: true if agent is performing rmpp, false otherwise.
+ */
+int ib_mad_kernel_rmpp_agent(struct ib_mad_agent *agent);
+
 #endif /* IB_MAD_H */
diff --git a/include/uapi/rdma/ib_user_mad.h b/include/uapi/rdma/ib_user_mad.h
index c00b081dc10e..09f809f323ea 100644
--- a/include/uapi/rdma/ib_user_mad.h
+++ b/include/uapi/rdma/ib_user_mad.h
@@ -213,7 +213,10 @@ struct ib_user_mad_reg_req {
  *                       used.
  * @rmpp_version       - If set, indicates the RMPP version to use.
  */
-#define IB_USER_MAD_REG_FLAGS_CAP (0)
+enum {
+	IB_USER_MAD_USER_RMPP = (1 << 0),
+};
+#define IB_USER_MAD_REG_FLAGS_CAP (IB_USER_MAD_USER_RMPP)
 struct ib_user_mad_reg_req2 {
 	__u32	id;
 	__u32	qpn;
-- 
cgit v1.2.3


From f72a113a71ab08c4df8a5f80ab2f8a140feb81f6 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 7 Aug 2014 09:36:00 +0200
Subject: drm/radeon: add userptr support v8
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds an IOCTL for turning a pointer supplied by
userspace into a buffer object.

It imposes several restrictions upon the memory being mapped:

1. It must be page aligned (both start/end addresses, i.e ptr and size).

2. It must be normal system memory, not a pointer into another map of IO
space (e.g. it must not be a GTT mmapping of another object).

3. The BO is mapped into GTT, so the maximum amount of memory mapped at
all times is still the GTT limit.

4. The BO is only mapped readonly for now, so no write support.

5. List of backing pages is only acquired once, so they represent a
snapshot of the first use.

Exporting and sharing as well as mapping of buffer objects created by
this function is forbidden and results in an -EPERM.

v2: squash all previous changes into first public version
v3: fix tabs, map readonly, don't use MM callback any more
v4: set TTM_PAGE_FLAG_SG so that TTM never messes with the pages,
    pin/unpin pages on bind/unbind instead of populate/unpopulate
v5: rebased on 3.17-wip, IOCTL renamed to userptr, reject any unknown
    flags, better handle READONLY flag, improve permission check
v6: fix ptr cast warning, use set_page_dirty/mark_page_accessed on unpin
v7: add warning about it's availability in the API definition
v8: drop access_ok check, fix VM mapping bits

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v4)
Reviewed-by: Jérôme Glisse <jglisse@redhat.com> (v4)
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon.h        |   6 ++
 drivers/gpu/drm/radeon/radeon_cs.c     |  25 +++++-
 drivers/gpu/drm/radeon/radeon_drv.c    |   5 +-
 drivers/gpu/drm/radeon/radeon_gem.c    |  68 ++++++++++++++++
 drivers/gpu/drm/radeon/radeon_kms.c    |   1 +
 drivers/gpu/drm/radeon/radeon_object.c |   3 +
 drivers/gpu/drm/radeon/radeon_prime.c  |  10 +++
 drivers/gpu/drm/radeon/radeon_ttm.c    | 145 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/radeon/radeon_vm.c     |   3 +
 include/uapi/drm/radeon_drm.h          |  16 ++++
 10 files changed, 279 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 9e1732eb402c..6f38a23a5810 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2138,6 +2138,8 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *filp);
 int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *filp);
+int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *filp);
 int radeon_gem_pin_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv);
 int radeon_gem_unpin_ioctl(struct drm_device *dev, void *data,
@@ -2871,6 +2873,10 @@ extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enabl
 extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
 extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain);
 extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
+extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
+				     uint32_t flags);
+extern bool radeon_ttm_tt_has_userptr(struct ttm_tt *ttm);
+extern bool radeon_ttm_tt_is_readonly(struct ttm_tt *ttm);
 extern void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64 base);
 extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 extern int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon);
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index ee712c199b25..1321491cf499 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -78,7 +78,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 	struct radeon_cs_chunk *chunk;
 	struct radeon_cs_buckets buckets;
 	unsigned i, j;
-	bool duplicate;
+	bool duplicate, need_mmap_lock = false;
+	int r;
 
 	if (p->chunk_relocs_idx == -1) {
 		return 0;
@@ -164,6 +165,19 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 			p->relocs[i].allowed_domains = domain;
 		}
 
+		if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
+			uint32_t domain = p->relocs[i].prefered_domains;
+			if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
+				DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
+					  "allowed for userptr BOs\n");
+				return -EINVAL;
+			}
+			need_mmap_lock = true;
+			domain = RADEON_GEM_DOMAIN_GTT;
+			p->relocs[i].prefered_domains = domain;
+			p->relocs[i].allowed_domains = domain;
+		}
+
 		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
 		p->relocs[i].handle = r->handle;
 
@@ -176,8 +190,15 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 	if (p->cs_flags & RADEON_CS_USE_VM)
 		p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
 					      &p->validated);
+	if (need_mmap_lock)
+		down_read(&current->mm->mmap_sem);
+
+	r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
 
-	return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
+	if (need_mmap_lock)
+		up_read(&current->mm->mmap_sem);
+
+	return r;
 }
 
 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index a773830c6c40..5b18af926527 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -114,6 +114,9 @@ int radeon_gem_object_open(struct drm_gem_object *obj,
 				struct drm_file *file_priv);
 void radeon_gem_object_close(struct drm_gem_object *obj,
 				struct drm_file *file_priv);
+struct dma_buf *radeon_gem_prime_export(struct drm_device *dev,
+					struct drm_gem_object *gobj,
+					int flags);
 extern int radeon_get_crtc_scanoutpos(struct drm_device *dev, int crtc,
 				      unsigned int flags,
 				      int *vpos, int *hpos, ktime_t *stime,
@@ -568,7 +571,7 @@ static struct drm_driver kms_driver = {
 
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
-	.gem_prime_export = drm_gem_prime_export,
+	.gem_prime_export = radeon_gem_prime_export,
 	.gem_prime_import = drm_gem_prime_import,
 	.gem_prime_pin = radeon_gem_prime_pin,
 	.gem_prime_unpin = radeon_gem_prime_unpin,
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index bfd7e1b0ff3f..993ab223b503 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -272,6 +272,65 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *filp)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_radeon_gem_userptr *args = data;
+	struct drm_gem_object *gobj;
+	struct radeon_bo *bo;
+	uint32_t handle;
+	int r;
+
+	if (offset_in_page(args->addr | args->size))
+		return -EINVAL;
+
+	/* we only support read only mappings for now */
+	if (!(args->flags & RADEON_GEM_USERPTR_READONLY))
+		return -EACCES;
+
+	/* reject unknown flag values */
+	if (args->flags & ~RADEON_GEM_USERPTR_READONLY)
+		return -EINVAL;
+
+	/* readonly pages not tested on older hardware */
+	if (rdev->family < CHIP_R600)
+		return -EINVAL;
+
+	down_read(&rdev->exclusive_lock);
+
+	/* create a gem object to contain this object in */
+	r = radeon_gem_object_create(rdev, args->size, 0,
+				     RADEON_GEM_DOMAIN_CPU, 0,
+				     false, &gobj);
+	if (r)
+		goto handle_lockup;
+
+	bo = gem_to_radeon_bo(gobj);
+	r = radeon_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags);
+	if (r)
+		goto release_object;
+
+	r = drm_gem_handle_create(filp, gobj, &handle);
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(gobj);
+	if (r)
+		goto handle_lockup;
+
+	args->handle = handle;
+	up_read(&rdev->exclusive_lock);
+	return 0;
+
+release_object:
+	drm_gem_object_unreference_unlocked(gobj);
+
+handle_lockup:
+	up_read(&rdev->exclusive_lock);
+	r = radeon_gem_handle_lockup(rdev, r);
+
+	return r;
+}
+
 int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp)
 {
@@ -315,6 +374,10 @@ int radeon_mode_dumb_mmap(struct drm_file *filp,
 		return -ENOENT;
 	}
 	robj = gem_to_radeon_bo(gobj);
+	if (radeon_ttm_tt_has_userptr(robj->tbo.ttm)) {
+		drm_gem_object_unreference_unlocked(gobj);
+		return -EPERM;
+	}
 	*offset_p = radeon_bo_mmap_offset(robj);
 	drm_gem_object_unreference_unlocked(gobj);
 	return 0;
@@ -532,6 +595,11 @@ int radeon_gem_op_ioctl(struct drm_device *dev, void *data,
 		return -ENOENT;
 	}
 	robj = gem_to_radeon_bo(gobj);
+
+	r = -EPERM;
+	if (radeon_ttm_tt_has_userptr(robj->tbo.ttm))
+		goto out;
+
 	r = radeon_bo_reserve(robj, false);
 	if (unlikely(r))
 		goto out;
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index eb7164d07985..8309b11e674d 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -885,5 +885,6 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_USERPTR, radeon_gem_userptr_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 int radeon_max_kms_ioctl = ARRAY_SIZE(radeon_ioctls_kms);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 480c87d8edc5..c73c1e320585 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -264,6 +264,9 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 {
 	int r, i;
 
+	if (radeon_ttm_tt_has_userptr(bo->tbo.ttm))
+		return -EPERM;
+
 	if (bo->pin_count) {
 		bo->pin_count++;
 		if (gpu_addr)
diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c
index f7e48d329db3..bb18bc74b7d7 100644
--- a/drivers/gpu/drm/radeon/radeon_prime.c
+++ b/drivers/gpu/drm/radeon/radeon_prime.c
@@ -103,3 +103,13 @@ void radeon_gem_prime_unpin(struct drm_gem_object *obj)
 	radeon_bo_unpin(bo);
 	radeon_bo_unreserve(bo);
 }
+
+struct dma_buf *radeon_gem_prime_export(struct drm_device *dev,
+					struct drm_gem_object *gobj,
+					int flags)
+{
+	struct radeon_bo *bo = gem_to_radeon_bo(gobj);
+	if (radeon_ttm_tt_has_userptr(bo->tbo.ttm))
+		return ERR_PTR(-EPERM);
+	return drm_gem_prime_export(dev, gobj, flags);
+}
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 72afe82a95c9..b20933fa35c6 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -39,6 +39,8 @@
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/swiotlb.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
 #include <linux/debugfs.h>
 #include "radeon_reg.h"
 #include "radeon.h"
@@ -515,8 +517,92 @@ struct radeon_ttm_tt {
 	struct ttm_dma_tt		ttm;
 	struct radeon_device		*rdev;
 	u64				offset;
+
+	uint64_t			userptr;
+	struct mm_struct		*usermm;
+	uint32_t			userflags;
 };
 
+/* prepare the sg table with the user pages */
+static int radeon_ttm_tt_pin_userptr(struct ttm_tt *ttm)
+{
+	struct radeon_device *rdev = radeon_get_rdev(ttm->bdev);
+	struct radeon_ttm_tt *gtt = (void *)ttm;
+	unsigned pinned = 0, nents;
+	int r;
+
+	int write = !(gtt->userflags & RADEON_GEM_USERPTR_READONLY);
+	enum dma_data_direction direction = write ?
+		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+
+	if (current->mm != gtt->usermm)
+		return -EPERM;
+
+	do {
+		unsigned num_pages = ttm->num_pages - pinned;
+		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
+		struct page **pages = ttm->pages + pinned;
+
+		r = get_user_pages(current, current->mm, userptr, num_pages,
+				   write, 0, pages, NULL);
+		if (r < 0)
+			goto release_pages;
+
+		pinned += r;
+
+	} while (pinned < ttm->num_pages);
+
+	r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
+				      ttm->num_pages << PAGE_SHIFT,
+				      GFP_KERNEL);
+	if (r)
+		goto release_sg;
+
+	r = -ENOMEM;
+	nents = dma_map_sg(rdev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
+	if (nents != ttm->sg->nents)
+		goto release_sg;
+
+	drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
+					 gtt->ttm.dma_address, ttm->num_pages);
+
+	return 0;
+
+release_sg:
+	kfree(ttm->sg);
+
+release_pages:
+	release_pages(ttm->pages, pinned, 0);
+	return r;
+}
+
+static void radeon_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
+{
+	struct radeon_device *rdev = radeon_get_rdev(ttm->bdev);
+	struct radeon_ttm_tt *gtt = (void *)ttm;
+	struct scatterlist *sg;
+	int i;
+
+	int write = !(gtt->userflags & RADEON_GEM_USERPTR_READONLY);
+	enum dma_data_direction direction = write ?
+		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+
+	/* free the sg table and pages again */
+	dma_unmap_sg(rdev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
+
+	for_each_sg(ttm->sg->sgl, sg, ttm->sg->nents, i) {
+		struct page *page = sg_page(sg);
+
+		if (!(gtt->userflags & RADEON_GEM_USERPTR_READONLY))
+			set_page_dirty(page);
+
+		mark_page_accessed(page);
+		page_cache_release(page);
+	}
+
+	sg_free_table(ttm->sg);
+}
+
 static int radeon_ttm_backend_bind(struct ttm_tt *ttm,
 				   struct ttm_mem_reg *bo_mem)
 {
@@ -525,6 +611,11 @@ static int radeon_ttm_backend_bind(struct ttm_tt *ttm,
 		RADEON_GART_PAGE_WRITE;
 	int r;
 
+	if (gtt->userptr) {
+		radeon_ttm_tt_pin_userptr(ttm);
+		flags &= ~RADEON_GART_PAGE_WRITE;
+	}
+
 	gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT);
 	if (!ttm->num_pages) {
 		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
@@ -547,6 +638,10 @@ static int radeon_ttm_backend_unbind(struct ttm_tt *ttm)
 	struct radeon_ttm_tt *gtt = (void *)ttm;
 
 	radeon_gart_unbind(gtt->rdev, gtt->offset, ttm->num_pages);
+
+	if (gtt->userptr)
+		radeon_ttm_tt_unpin_userptr(ttm);
+
 	return 0;
 }
 
@@ -603,6 +698,16 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm)
 	if (ttm->state != tt_unpopulated)
 		return 0;
 
+	if (gtt->userptr) {
+		ttm->sg = kcalloc(1, sizeof(struct sg_table), GFP_KERNEL);
+		if (!ttm->sg)
+			return -ENOMEM;
+
+		ttm->page_flags |= TTM_PAGE_FLAG_SG;
+		ttm->state = tt_unbound;
+		return 0;
+	}
+
 	if (slave && ttm->sg) {
 		drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
 						 gtt->ttm.dma_address, ttm->num_pages);
@@ -652,6 +757,12 @@ static void radeon_ttm_tt_unpopulate(struct ttm_tt *ttm)
 	unsigned i;
 	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
 
+	if (gtt->userptr) {
+		kfree(ttm->sg);
+		ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
+		return;
+	}
+
 	if (slave)
 		return;
 
@@ -680,6 +791,40 @@ static void radeon_ttm_tt_unpopulate(struct ttm_tt *ttm)
 	ttm_pool_unpopulate(ttm);
 }
 
+int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
+			      uint32_t flags)
+{
+	struct radeon_ttm_tt *gtt = (void *)ttm;
+
+	if (gtt == NULL)
+		return -EINVAL;
+
+	gtt->userptr = addr;
+	gtt->usermm = current->mm;
+	gtt->userflags = flags;
+	return 0;
+}
+
+bool radeon_ttm_tt_has_userptr(struct ttm_tt *ttm)
+{
+	struct radeon_ttm_tt *gtt = (void *)ttm;
+
+	if (gtt == NULL)
+		return false;
+
+	return !!gtt->userptr;
+}
+
+bool radeon_ttm_tt_is_readonly(struct ttm_tt *ttm)
+{
+	struct radeon_ttm_tt *gtt = (void *)ttm;
+
+	if (gtt == NULL)
+		return false;
+
+	return !!(gtt->userflags & RADEON_GEM_USERPTR_READONLY);
+}
+
 static struct ttm_bo_driver radeon_bo_driver = {
 	.ttm_tt_create = &radeon_ttm_tt_create,
 	.ttm_tt_populate = &radeon_ttm_tt_populate,
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index ccae4d9dc3de..0e107c5650bf 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -888,6 +888,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 	bo_va->flags &= ~RADEON_VM_PAGE_VALID;
 	bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
 	bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED;
+	if (bo_va->bo && radeon_ttm_tt_is_readonly(bo_va->bo->tbo.ttm))
+		bo_va->flags &= ~RADEON_VM_PAGE_WRITEABLE;
+
 	if (mem) {
 		addr = mem->start << PAGE_SHIFT;
 		if (mem->mem_type != TTM_PL_SYSTEM) {
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 509b2d7a41b7..3a9f20930372 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -511,6 +511,7 @@ typedef struct {
 #define DRM_RADEON_GEM_BUSY		0x2a
 #define DRM_RADEON_GEM_VA		0x2b
 #define DRM_RADEON_GEM_OP		0x2c
+#define DRM_RADEON_GEM_USERPTR		0x2d
 
 #define DRM_IOCTL_RADEON_CP_INIT    DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t)
 #define DRM_IOCTL_RADEON_CP_START   DRM_IO(  DRM_COMMAND_BASE + DRM_RADEON_CP_START)
@@ -554,6 +555,7 @@ typedef struct {
 #define DRM_IOCTL_RADEON_GEM_BUSY	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy)
 #define DRM_IOCTL_RADEON_GEM_VA		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va)
 #define DRM_IOCTL_RADEON_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_OP, struct drm_radeon_gem_op)
+#define DRM_IOCTL_RADEON_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_USERPTR, struct drm_radeon_gem_userptr)
 
 typedef struct drm_radeon_init {
 	enum {
@@ -808,6 +810,20 @@ struct drm_radeon_gem_create {
 	uint32_t	flags;
 };
 
+/*
+ * This is not a reliable API and you should expect it to fail for any
+ * number of reasons and have fallback path that do not use userptr to
+ * perform any operation.
+ */
+#define RADEON_GEM_USERPTR_READONLY	(1 << 0)
+
+struct drm_radeon_gem_userptr {
+	uint64_t		addr;
+	uint64_t		size;
+	uint32_t		flags;
+	uint32_t		handle;
+};
+
 #define RADEON_TILING_MACRO				0x1
 #define RADEON_TILING_MICRO				0x2
 #define RADEON_TILING_SWAP_16BIT			0x4
-- 
cgit v1.2.3


From ddd00e33e17a62c5f44377ab42e7562ccfae7bd1 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 7 Aug 2014 09:36:01 +0200
Subject: drm/radeon: add userptr flag to limit it to anonymous memory v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Avoid problems with writeback by limiting userptr to anonymous memory.

v2: add commit and code comments

Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_gem.c |  3 ++-
 drivers/gpu/drm/radeon/radeon_ttm.c | 10 ++++++++++
 include/uapi/drm/radeon_drm.h       |  1 +
 3 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 993ab223b503..032736b429bf 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -290,7 +290,8 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
 		return -EACCES;
 
 	/* reject unknown flag values */
-	if (args->flags & ~RADEON_GEM_USERPTR_READONLY)
+	if (args->flags & ~(RADEON_GEM_USERPTR_READONLY |
+	    RADEON_GEM_USERPTR_ANONONLY))
 		return -EINVAL;
 
 	/* readonly pages not tested on older hardware */
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index b20933fa35c6..12e37b1ddc40 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -538,6 +538,16 @@ static int radeon_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 	if (current->mm != gtt->usermm)
 		return -EPERM;
 
+	if (gtt->userflags & RADEON_GEM_USERPTR_ANONONLY) {
+		/* check that we only pin down anonymous memory
+		   to prevent problems with writeback */
+		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
+		struct vm_area_struct *vma;
+		vma = find_vma(gtt->usermm, gtt->userptr);
+		if (!vma || vma->vm_file || vma->vm_end < end)
+			return -EPERM;
+	}
+
 	do {
 		unsigned num_pages = ttm->num_pages - pinned;
 		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 3a9f20930372..9720e1a36848 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -816,6 +816,7 @@ struct drm_radeon_gem_create {
  * perform any operation.
  */
 #define RADEON_GEM_USERPTR_READONLY	(1 << 0)
+#define RADEON_GEM_USERPTR_ANONONLY	(1 << 1)
 
 struct drm_radeon_gem_userptr {
 	uint64_t		addr;
-- 
cgit v1.2.3


From 2a84a4476d6e13de72472f6ca4338aed0a8269b8 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 7 Aug 2014 09:36:02 +0200
Subject: drm/radeon: add userptr flag to directly validate the BO to GTT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This way we test userptr availability at BO creation time instead of first use.

Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_gem.c | 18 +++++++++++++++++-
 include/uapi/drm/radeon_drm.h       |  1 +
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 032736b429bf..450656027aba 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -291,7 +291,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
 
 	/* reject unknown flag values */
 	if (args->flags & ~(RADEON_GEM_USERPTR_READONLY |
-	    RADEON_GEM_USERPTR_ANONONLY))
+	    RADEON_GEM_USERPTR_ANONONLY | RADEON_GEM_USERPTR_VALIDATE))
 		return -EINVAL;
 
 	/* readonly pages not tested on older hardware */
@@ -312,6 +312,22 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
 	if (r)
 		goto release_object;
 
+	if (args->flags & RADEON_GEM_USERPTR_VALIDATE) {
+		down_read(&current->mm->mmap_sem);
+		r = radeon_bo_reserve(bo, true);
+		if (r) {
+			up_read(&current->mm->mmap_sem);
+			goto release_object;
+		}
+
+		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT);
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
+		radeon_bo_unreserve(bo);
+		up_read(&current->mm->mmap_sem);
+		if (r)
+			goto release_object;
+	}
+
 	r = drm_gem_handle_create(filp, gobj, &handle);
 	/* drop reference from allocate - handle holds it now */
 	drm_gem_object_unreference_unlocked(gobj);
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 9720e1a36848..5dc61c2d4c73 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -817,6 +817,7 @@ struct drm_radeon_gem_create {
  */
 #define RADEON_GEM_USERPTR_READONLY	(1 << 0)
 #define RADEON_GEM_USERPTR_ANONONLY	(1 << 1)
+#define RADEON_GEM_USERPTR_VALIDATE	(1 << 2)
 
 struct drm_radeon_gem_userptr {
 	uint64_t		addr;
-- 
cgit v1.2.3


From 341cb9e426fac32523427c80c67543a16be46605 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 7 Aug 2014 09:36:03 +0200
Subject: drm/radeon: add userptr flag to register MMU notifier v3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Whenever userspace mapping related to our userptr change
we wait for it to become idle and unmap it from GTT.

v2: rebased, fix mutex unlock in error path
v3: improve commit message

Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/Kconfig                |   1 +
 drivers/gpu/drm/radeon/Makefile        |   2 +-
 drivers/gpu/drm/radeon/radeon.h        |  12 ++
 drivers/gpu/drm/radeon/radeon_device.c |   2 +
 drivers/gpu/drm/radeon/radeon_gem.c    |   9 +-
 drivers/gpu/drm/radeon/radeon_mn.c     | 272 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/radeon/radeon_object.c |   1 +
 include/uapi/drm/radeon_drm.h          |   1 +
 8 files changed, 298 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/radeon/radeon_mn.c

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index b066bb3ca01a..358b6e8697e9 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -115,6 +115,7 @@ config DRM_RADEON
 	select HWMON
 	select BACKLIGHT_CLASS_DEVICE
 	select INTERVAL_TREE
+	select MMU_NOTIFIER
 	help
 	  Choose this option if you have an ATI Radeon graphics card.  There
 	  are both PCI and AGP versions.  You don't need to choose this to
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index 0013ad0db9ef..c7fa1aeb8c3f 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -80,7 +80,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
 	r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \
 	rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \
 	trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \
-	ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o
+	ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o
 
 # add async DMA block
 radeon-y += \
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 6f38a23a5810..542da8208674 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -65,6 +65,7 @@
 #include <linux/list.h>
 #include <linux/kref.h>
 #include <linux/interval_tree.h>
+#include <linux/hashtable.h>
 
 #include <ttm/ttm_bo_api.h>
 #include <ttm/ttm_bo_driver.h>
@@ -487,6 +488,9 @@ struct radeon_bo {
 
 	struct ttm_bo_kmap_obj		dma_buf_vmap;
 	pid_t				pid;
+
+	struct radeon_mn		*mn;
+	struct interval_tree_node	mn_it;
 };
 #define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, gem_base)
 
@@ -1725,6 +1729,11 @@ void radeon_test_ring_sync(struct radeon_device *rdev,
 			   struct radeon_ring *cpB);
 void radeon_test_syncing(struct radeon_device *rdev);
 
+/*
+ * MMU Notifier
+ */
+int radeon_mn_register(struct radeon_bo *bo, unsigned long addr);
+void radeon_mn_unregister(struct radeon_bo *bo);
 
 /*
  * Debugfs
@@ -2372,6 +2381,9 @@ struct radeon_device {
 	/* tracking pinned memory */
 	u64 vram_pin_size;
 	u64 gart_pin_size;
+
+	struct mutex	mn_lock;
+	DECLARE_HASHTABLE(mn_hash, 7);
 };
 
 bool radeon_is_px(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index c8ea050c8fa4..c58f84f3c6a5 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1270,6 +1270,8 @@ int radeon_device_init(struct radeon_device *rdev,
 	init_rwsem(&rdev->pm.mclk_lock);
 	init_rwsem(&rdev->exclusive_lock);
 	init_waitqueue_head(&rdev->irq.vblank_queue);
+	mutex_init(&rdev->mn_lock);
+	hash_init(rdev->mn_hash);
 	r = radeon_gem_init(rdev);
 	if (r)
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 450656027aba..2a6fbf101cf0 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -291,7 +291,8 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
 
 	/* reject unknown flag values */
 	if (args->flags & ~(RADEON_GEM_USERPTR_READONLY |
-	    RADEON_GEM_USERPTR_ANONONLY | RADEON_GEM_USERPTR_VALIDATE))
+	    RADEON_GEM_USERPTR_ANONONLY | RADEON_GEM_USERPTR_VALIDATE |
+	    RADEON_GEM_USERPTR_REGISTER))
 		return -EINVAL;
 
 	/* readonly pages not tested on older hardware */
@@ -312,6 +313,12 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
 	if (r)
 		goto release_object;
 
+	if (args->flags & RADEON_GEM_USERPTR_REGISTER) {
+		r = radeon_mn_register(bo, args->addr);
+		if (r)
+			goto release_object;
+	}
+
 	if (args->flags & RADEON_GEM_USERPTR_VALIDATE) {
 		down_read(&current->mm->mmap_sem);
 		r = radeon_bo_reserve(bo, true);
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
new file mode 100644
index 000000000000..0157bc2f11f8
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Christian König <christian.koenig@amd.com>
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/mmu_notifier.h>
+#include <drm/drmP.h>
+#include <drm/drm.h>
+
+#include "radeon.h"
+
+struct radeon_mn {
+	/* constant after initialisation */
+	struct radeon_device	*rdev;
+	struct mm_struct	*mm;
+	struct mmu_notifier	mn;
+
+	/* only used on destruction */
+	struct work_struct	work;
+
+	/* protected by rdev->mn_lock */
+	struct hlist_node	node;
+
+	/* objects protected by lock */
+	struct mutex		lock;
+	struct rb_root		objects;
+};
+
+/**
+ * radeon_mn_destroy - destroy the rmn
+ *
+ * @work: previously sheduled work item
+ *
+ * Lazy destroys the notifier from a work item
+ */
+static void radeon_mn_destroy(struct work_struct *work)
+{
+	struct radeon_mn *rmn = container_of(work, struct radeon_mn, work);
+	struct radeon_device *rdev = rmn->rdev;
+	struct radeon_bo *bo, *next;
+
+	mutex_lock(&rdev->mn_lock);
+	mutex_lock(&rmn->lock);
+	hash_del(&rmn->node);
+	rbtree_postorder_for_each_entry_safe(bo, next, &rmn->objects, mn_it.rb) {
+		interval_tree_remove(&bo->mn_it, &rmn->objects);
+		bo->mn = NULL;
+	}
+	mutex_unlock(&rmn->lock);
+	mutex_unlock(&rdev->mn_lock);
+	mmu_notifier_unregister(&rmn->mn, rmn->mm);
+	kfree(rmn);
+}
+
+/**
+ * radeon_mn_release - callback to notify about mm destruction
+ *
+ * @mn: our notifier
+ * @mn: the mm this callback is about
+ *
+ * Shedule a work item to lazy destroy our notifier.
+ */
+static void radeon_mn_release(struct mmu_notifier *mn,
+			      struct mm_struct *mm)
+{
+	struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn);
+	INIT_WORK(&rmn->work, radeon_mn_destroy);
+	schedule_work(&rmn->work);
+}
+
+/**
+ * radeon_mn_invalidate_range_start - callback to notify about mm change
+ *
+ * @mn: our notifier
+ * @mn: the mm this callback is about
+ * @start: start of updated range
+ * @end: end of updated range
+ *
+ * We block for all BOs between start and end to be idle and
+ * unmap them by move them into system domain again.
+ */
+static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
+					     struct mm_struct *mm,
+					     unsigned long start,
+					     unsigned long end)
+{
+	struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn);
+	struct interval_tree_node *it;
+
+	/* notification is exclusive, but interval is inclusive */
+	end -= 1;
+
+	mutex_lock(&rmn->lock);
+
+	it = interval_tree_iter_first(&rmn->objects, start, end);
+	while (it) {
+		struct radeon_bo *bo;
+		int r;
+
+		bo = container_of(it, struct radeon_bo, mn_it);
+		it = interval_tree_iter_next(it, start, end);
+
+		r = radeon_bo_reserve(bo, true);
+		if (r) {
+			DRM_ERROR("(%d) failed to reserve user bo\n", r);
+			continue;
+		}
+
+		if (bo->tbo.sync_obj) {
+			r = radeon_fence_wait(bo->tbo.sync_obj, false);
+			if (r)
+				DRM_ERROR("(%d) failed to wait for user bo\n", r);
+		}
+
+		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU);
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
+		if (r)
+			DRM_ERROR("(%d) failed to validate user bo\n", r);
+
+		radeon_bo_unreserve(bo);
+	}
+	
+	mutex_unlock(&rmn->lock);
+}
+
+static const struct mmu_notifier_ops radeon_mn_ops = {
+	.release = radeon_mn_release,
+	.invalidate_range_start = radeon_mn_invalidate_range_start,
+};
+
+/**
+ * radeon_mn_get - create notifier context
+ *
+ * @rdev: radeon device pointer
+ *
+ * Creates a notifier context for current->mm.
+ */
+static struct radeon_mn *radeon_mn_get(struct radeon_device *rdev)
+{
+	struct mm_struct *mm = current->mm;
+	struct radeon_mn *rmn;
+	int r;
+
+	down_write(&mm->mmap_sem);
+	mutex_lock(&rdev->mn_lock);
+
+	hash_for_each_possible(rdev->mn_hash, rmn, node, (unsigned long)mm)
+		if (rmn->mm == mm)
+			goto release_locks;
+
+	rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
+	if (!rmn) {
+		rmn = ERR_PTR(-ENOMEM);
+		goto release_locks;
+	}
+
+	rmn->rdev = rdev;
+	rmn->mm = mm;
+	rmn->mn.ops = &radeon_mn_ops;
+	mutex_init(&rmn->lock);
+	rmn->objects = RB_ROOT;
+	
+	r = __mmu_notifier_register(&rmn->mn, mm);
+	if (r)
+		goto free_rmn;
+
+	hash_add(rdev->mn_hash, &rmn->node, (unsigned long)mm);
+
+release_locks:
+	mutex_unlock(&rdev->mn_lock);
+	up_write(&mm->mmap_sem);
+
+	return rmn;
+
+free_rmn:
+	mutex_unlock(&rdev->mn_lock);
+	up_write(&mm->mmap_sem);
+	kfree(rmn);
+
+	return ERR_PTR(r);
+}
+
+/**
+ * radeon_mn_register - register a BO for notifier updates
+ *
+ * @bo: radeon buffer object
+ * @addr: userptr addr we should monitor
+ *
+ * Registers an MMU notifier for the given BO at the specified address.
+ * Returns 0 on success, -ERRNO if anything goes wrong.
+ */
+int radeon_mn_register(struct radeon_bo *bo, unsigned long addr)
+{
+	unsigned long end = addr + radeon_bo_size(bo) - 1;
+	struct radeon_device *rdev = bo->rdev;
+	struct radeon_mn *rmn;
+	struct interval_tree_node *it;
+
+	rmn = radeon_mn_get(rdev);
+	if (IS_ERR(rmn))
+		return PTR_ERR(rmn);
+
+	mutex_lock(&rmn->lock);
+
+	it = interval_tree_iter_first(&rmn->objects, addr, end);
+	if (it) {
+		mutex_unlock(&rmn->lock);
+		return -EEXIST;
+	}
+
+	bo->mn = rmn;
+	bo->mn_it.start = addr;
+	bo->mn_it.last = end;
+	interval_tree_insert(&bo->mn_it, &rmn->objects);
+
+	mutex_unlock(&rmn->lock);
+
+	return 0;
+}
+
+/**
+ * radeon_mn_unregister - unregister a BO for notifier updates
+ *
+ * @bo: radeon buffer object
+ *
+ * Remove any registration of MMU notifier updates from the buffer object.
+ */
+void radeon_mn_unregister(struct radeon_bo *bo)
+{
+	struct radeon_device *rdev = bo->rdev;
+	struct radeon_mn *rmn;
+
+	mutex_lock(&rdev->mn_lock);
+	rmn = bo->mn;
+	if (rmn == NULL) {
+		mutex_unlock(&rdev->mn_lock);
+		return;
+	}
+
+	mutex_lock(&rmn->lock);
+	interval_tree_remove(&bo->mn_it, &rmn->objects);
+	bo->mn = NULL;
+	mutex_unlock(&rmn->lock);
+	mutex_unlock(&rdev->mn_lock);
+}
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index c73c1e320585..287523807989 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -75,6 +75,7 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 	bo = container_of(tbo, struct radeon_bo, tbo);
 
 	radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1);
+	radeon_mn_unregister(bo);
 
 	mutex_lock(&bo->rdev->gem.mutex);
 	list_del_init(&bo->list);
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 5dc61c2d4c73..c77495ffc44f 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -818,6 +818,7 @@ struct drm_radeon_gem_create {
 #define RADEON_GEM_USERPTR_READONLY	(1 << 0)
 #define RADEON_GEM_USERPTR_ANONONLY	(1 << 1)
 #define RADEON_GEM_USERPTR_VALIDATE	(1 << 2)
+#define RADEON_GEM_USERPTR_REGISTER	(1 << 3)
 
 struct drm_radeon_gem_userptr {
 	uint64_t		addr;
-- 
cgit v1.2.3


From db1044d458a287c18c4d413adc4ad12e92e253b5 Mon Sep 17 00:00:00 2001
From: Doug Ledford <dledford@redhat.com>
Date: Tue, 12 Aug 2014 19:20:11 -0400
Subject: RDMA/uapi: Include socket.h in rdma_user_cm.h

added struct sockaddr_storage to rdma_user_cm.h without also adding an
include for linux/socket.h to make sure it is defined.  Systemtap
needs the header files to build standalone and cannot rely on other
files to pre-include other headers, so add linux/socket.h to the list
of includes in this file.

Fixes: ee7aed4528f ("RDMA/ucma: Support querying for AF_IB addresses")
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 include/uapi/rdma/rdma_user_cm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h
index 99b80abf360a..3066718eb120 100644
--- a/include/uapi/rdma/rdma_user_cm.h
+++ b/include/uapi/rdma/rdma_user_cm.h
@@ -34,6 +34,7 @@
 #define RDMA_USER_CM_H
 
 #include <linux/types.h>
+#include <linux/socket.h>
 #include <linux/in6.h>
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_user_sa.h>
-- 
cgit v1.2.3


From 503e6636b6f96056210062be703356f4253b6db9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 11 Aug 2014 14:24:47 +0100
Subject: asm-generic: add memfd_create system call to unistd.h

Commit 9183df25fe7b ("shm: add memfd_create() syscall") added a new
system call (memfd_create) but didn't update the asm-generic unistd
header.

This patch adds the new system call to the asm-generic version of
unistd.h so that it can be used by architectures such as arm64.

Cc: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/uapi/asm-generic/unistd.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index f1afd607f043..11d11bc5c78f 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -703,9 +703,11 @@ __SYSCALL(__NR_renameat2, sys_renameat2)
 __SYSCALL(__NR_seccomp, sys_seccomp)
 #define __NR_getrandom 278
 __SYSCALL(__NR_getrandom, sys_getrandom)
+#define __NR_memfd_create 279
+__SYSCALL(__NR_memfd_create, sys_memfd_create)
 
 #undef __NR_syscalls
-#define __NR_syscalls 279
+#define __NR_syscalls 280
 
 /*
  * All syscalls below here should go away really,
-- 
cgit v1.2.3


From 18c01ab30288d9d0a7d80b08b659531f37ed379d Mon Sep 17 00:00:00 2001
From: Rajesh Ghanekar <Rajesh_Ghanekar@symantec.com>
Date: Fri, 1 Aug 2014 22:17:30 -0400
Subject: nfsd: allow turning off nfsv3 readdir_plus

One of our customer's application only needs file names, not file
attributes. With directories having 10K+ inodes (assuming buffer cache
has directory blocks cached having file names, but inode cache is
limited and hence need eviction of older cached inodes), older inodes
are evicted periodically. So if they keep on doing readdir(2) from NSF
client on multiple directories, some directory's files are periodically
removed from inode cache and hence new readdir(2) on same directory
requires disk access to bring back inodes again to inode cache.

As READDIRPLUS request fetches attributes also, doing getattr on each
file on server, it causes unnecessary disk accesses. If READDIRPLUS on
NFS client is returned with -ENOTSUPP, NFS client uses READDIR request
which just gets the names of the files in a directory, not attributes,
hence avoiding disk accesses on server.

There's already a corresponding client-side mount option, but an export
option reduces the need for configuration across multiple clients.

This flag affects NFSv3 only.  If it turns out it's needed for NFSv4 as
well then we may have to figure out how to extend the behavior to NFSv4,
but it's not currently obvious how to do that.

Signed-off-by: Rajesh Ghanekar <rajesh_ghanekar@symantec.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/export.c                 | 1 +
 fs/nfsd/nfs3proc.c               | 8 ++++++++
 include/uapi/linux/nfsd/export.h | 5 +++--
 3 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 72ffd7cce3c3..30a739d896ff 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1145,6 +1145,7 @@ static struct flags {
 	{ NFSEXP_ALLSQUASH, {"all_squash", ""}},
 	{ NFSEXP_ASYNC, {"async", "sync"}},
 	{ NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}},
+	{ NFSEXP_NOREADDIRPLUS, {"nordirplus", ""}},
 	{ NFSEXP_NOHIDE, {"nohide", ""}},
 	{ NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
 	{ NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index fc51f7f6b36d..12f2aab4f614 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -466,6 +466,14 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
 	resp->buflen = resp->count;
 	resp->rqstp = rqstp;
 	offset = argp->cookie;
+
+	nfserr = fh_verify(rqstp, &resp->fh, S_IFDIR, NFSD_MAY_NOP);
+	if (nfserr)
+		RETURN_STATUS(nfserr);
+
+	if (resp->fh.fh_export->ex_flags & NFSEXP_NOREADDIRPLUS)
+		RETURN_STATUS(nfserr_notsupp);
+
 	nfserr = nfsd_readdir(rqstp, &resp->fh,
 				     &offset,
 				     &resp->common,
diff --git a/include/uapi/linux/nfsd/export.h b/include/uapi/linux/nfsd/export.h
index cf47c313794e..584b6ef3a5e8 100644
--- a/include/uapi/linux/nfsd/export.h
+++ b/include/uapi/linux/nfsd/export.h
@@ -28,7 +28,8 @@
 #define NFSEXP_ALLSQUASH	0x0008
 #define NFSEXP_ASYNC		0x0010
 #define NFSEXP_GATHERED_WRITES	0x0020
-/* 40 80 100 currently unused */
+#define NFSEXP_NOREADDIRPLUS    0x0040
+/* 80 100 currently unused */
 #define NFSEXP_NOHIDE		0x0200
 #define NFSEXP_NOSUBTREECHECK	0x0400
 #define	NFSEXP_NOAUTHNLM	0x0800		/* Don't authenticate NLM requests - just trust */
@@ -47,7 +48,7 @@
  */
 #define	NFSEXP_V4ROOT		0x10000
 /* All flags that we claim to support.  (Note we don't support NOACL.) */
-#define NFSEXP_ALLFLAGS		0x17E3F
+#define NFSEXP_ALLFLAGS		0x1FE7F
 
 /* The flags that may vary depending on security flavor: */
 #define NFSEXP_SECINFO_FLAGS	(NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
-- 
cgit v1.2.3


From 701e1e789142042144c8cc10b8f6d1554e960144 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 15 Aug 2014 11:52:53 +0200
Subject: drm/radeon: properly document reloc priority mask
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of hard coding the value properly document
that this is an userspace interface.

No intended functional change.

Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/radeon_cs.c | 3 ++-
 include/uapi/drm/radeon_drm.h      | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index ee712c199b25..cb12df784d83 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -132,7 +132,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 		 * the buffers used for read only, which doubles the range
 		 * to 0 to 31. 32 is reserved for the kernel driver.
 		 */
-		priority = (r->flags & 0xf) * 2 + !!r->write_domain;
+		priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
+			   + !!r->write_domain;
 
 		/* the first reloc of an UVD job is the msg and that must be in
 		   VRAM, also but everything into VRAM on AGP cards to avoid
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 509b2d7a41b7..fea6099608ef 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -944,6 +944,7 @@ struct drm_radeon_cs_chunk {
 };
 
 /* drm_radeon_cs_reloc.flags */
+#define RADEON_RELOC_PRIO_MASK		(0xf << 0)
 
 struct drm_radeon_cs_reloc {
 	uint32_t		handle;
-- 
cgit v1.2.3


From e91ded8db57472c20b59b2242b100764cc152a10 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Mon, 4 Aug 2014 04:50:41 -0400
Subject: uapi: netfilter_arp: use __u8 instead of u_int8_t

Similarly, the u_int8_t type is non-standard and not defined.  Change
it to use __u8 like the rest of the netfilter headers.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter_arp/arpt_mangle.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter_arp/arpt_mangle.h b/include/uapi/linux/netfilter_arp/arpt_mangle.h
index 250f502902bb..8c2b16a1f5a0 100644
--- a/include/uapi/linux/netfilter_arp/arpt_mangle.h
+++ b/include/uapi/linux/netfilter_arp/arpt_mangle.h
@@ -13,7 +13,7 @@ struct arpt_mangle
 	union {
 		struct in_addr tgt_ip;
 	} u_t;
-	u_int8_t flags;
+	__u8 flags;
 	int target;
 };
 
-- 
cgit v1.2.3


From 0fc87864879c46afe145e20ec09c9dba2328e3be Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Wed, 28 May 2014 09:38:21 -0300
Subject: [media] v4l: Add test pattern colour component controls

In many cases the test pattern has selectable values for each colour
component. Implement controls for raw bayer components. Additional controls
should be defined for colour components that are not covered by these
controls.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 Documentation/DocBook/media/v4l/controls.xml | 34 ++++++++++++++++++++++++++++
 drivers/media/v4l2-core/v4l2-ctrls.c         |  4 ++++
 include/uapi/linux/v4l2-controls.h           |  4 ++++
 3 files changed, 42 insertions(+)

(limited to 'include/uapi')

diff --git a/Documentation/DocBook/media/v4l/controls.xml b/Documentation/DocBook/media/v4l/controls.xml
index 9f5ffd85560b..a7eb1bde8b92 100644
--- a/Documentation/DocBook/media/v4l/controls.xml
+++ b/Documentation/DocBook/media/v4l/controls.xml
@@ -4790,6 +4790,40 @@ interface and may change in the future.</para>
 	    conversion.
 	    </entry>
 	  </row>
+	  <row>
+	    <entry spanname="id"><constant>V4L2_CID_TEST_PATTERN_RED</constant></entry>
+	    <entry>integer</entry>
+	  </row>
+	  <row>
+	    <entry spanname="descr">Test pattern red colour component.
+	    </entry>
+	  </row>
+	  <row>
+	    <entry spanname="id"><constant>V4L2_CID_TEST_PATTERN_GREENR</constant></entry>
+	    <entry>integer</entry>
+	  </row>
+	  <row>
+	    <entry spanname="descr">Test pattern green (next to red)
+	    colour component.
+	    </entry>
+	  </row>
+	  <row>
+	    <entry spanname="id"><constant>V4L2_CID_TEST_PATTERN_BLUE</constant></entry>
+	    <entry>integer</entry>
+	  </row>
+	  <row>
+	    <entry spanname="descr">Test pattern blue colour component.
+	    </entry>
+	  </row>
+	  <row>
+	    <entry spanname="id"><constant>V4L2_CID_TEST_PATTERN_GREENB</constant></entry>
+	    <entry>integer</entry>
+	  </row>
+	  <row>
+	    <entry spanname="descr">Test pattern green (next to blue)
+	    colour component.
+	    </entry>
+	  </row>
 	  <row><entry></entry></row>
 	</tbody>
       </tgroup>
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index f030d6a9e044..35d1f3d5045b 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -859,6 +859,10 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_VBLANK:			return "Vertical Blanking";
 	case V4L2_CID_HBLANK:			return "Horizontal Blanking";
 	case V4L2_CID_ANALOGUE_GAIN:		return "Analogue Gain";
+	case V4L2_CID_TEST_PATTERN_RED:		return "Red Pixel Value";
+	case V4L2_CID_TEST_PATTERN_GREENR:	return "Green (Red) Pixel Value";
+	case V4L2_CID_TEST_PATTERN_BLUE:	return "Blue Pixel Value";
+	case V4L2_CID_TEST_PATTERN_GREENB:	return "Green (Blue) Pixel Value";
 
 	/* Image processing controls */
 	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index e946e43fb8d5..8b930210a4b9 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -865,6 +865,10 @@ enum v4l2_jpeg_chroma_subsampling {
 #define V4L2_CID_VBLANK				(V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 1)
 #define V4L2_CID_HBLANK				(V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 2)
 #define V4L2_CID_ANALOGUE_GAIN			(V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 3)
+#define V4L2_CID_TEST_PATTERN_RED		(V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 4)
+#define V4L2_CID_TEST_PATTERN_GREENR		(V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 5)
+#define V4L2_CID_TEST_PATTERN_BLUE		(V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 6)
+#define V4L2_CID_TEST_PATTERN_GREENB		(V4L2_CID_IMAGE_SOURCE_CLASS_BASE + 7)
 
 
 /* Image processing controls */
-- 
cgit v1.2.3


From a913d8742e275dd2d80726afac02311a0f49d161 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Mon, 26 May 2014 09:46:18 -0300
Subject: [media] smiapp: Add driver-specific test pattern menu item
 definitions

Add numeric definitions for menu items used in the smiapp driver's test
pattern menu.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Acked-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/Kbuild   |  1 +
 include/uapi/linux/smiapp.h | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)
 create mode 100644 include/uapi/linux/smiapp.h

(limited to 'include/uapi')

diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 24e9033f8b3f..4ec377d103c7 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -353,6 +353,7 @@ header-y += serio.h
 header-y += shm.h
 header-y += signal.h
 header-y += signalfd.h
+header-y += smiapp.h
 header-y += snmp.h
 header-y += sock_diag.h
 header-y += socket.h
diff --git a/include/uapi/linux/smiapp.h b/include/uapi/linux/smiapp.h
new file mode 100644
index 000000000000..53938f4412ee
--- /dev/null
+++ b/include/uapi/linux/smiapp.h
@@ -0,0 +1,29 @@
+/*
+ * include/uapi/linux/smiapp.h
+ *
+ * Generic driver for SMIA/SMIA++ compliant camera modules
+ *
+ * Copyright (C) 2014 Intel Corporation
+ * Contact: Sakari Ailus <sakari.ailus@iki.fi>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#ifndef __UAPI_LINUX_SMIAPP_H_
+#define __UAPI_LINUX_SMIAPP_H_
+
+#define V4L2_SMIAPP_TEST_PATTERN_MODE_DISABLED			0
+#define V4L2_SMIAPP_TEST_PATTERN_MODE_SOLID_COLOUR		1
+#define V4L2_SMIAPP_TEST_PATTERN_MODE_COLOUR_BARS		2
+#define V4L2_SMIAPP_TEST_PATTERN_MODE_COLOUR_BARS_GREY		3
+#define V4L2_SMIAPP_TEST_PATTERN_MODE_PN9			4
+
+#endif /* __UAPI_LINUX_SMIAPP_H_ */
-- 
cgit v1.2.3


From e2a093ff0dbfa4c5d99f25241cf33325e9691d91 Mon Sep 17 00:00:00 2001
From: Ana Rey <anarey@gmail.com>
Date: Wed, 6 Aug 2014 13:52:49 +0200
Subject: netfilter: nft_meta: add pkttype support

Add pkttype support for ip, ipv6 and inet families of tables.

This allows you to fetch the meta packet type based on the link layer
information. The loopback traffic is a special case, the packet type
is guessed from the network layer header.

No special handling for bridge and arp since we're not going to see
such traffic in the loopback interface.

Joint work with Alvaro Neira Ayuso <alvaroneay@gmail.com>

Signed-off-by: Alvaro Neira Ayuso <alvaroneay@gmail.com>
Signed-off-by: Ana Rey <anarey@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  2 ++
 net/netfilter/nft_meta.c                 | 28 ++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 801bdd1e56e3..98144cdd8986 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -571,6 +571,7 @@ enum nft_exthdr_attributes {
  * @NFT_META_L4PROTO: layer 4 protocol number
  * @NFT_META_BRI_IIFNAME: packet input bridge interface name
  * @NFT_META_BRI_OIFNAME: packet output bridge interface name
+ * @NFT_META_PKTTYPE: packet type (skb->pkt_type), special handling for loopback
  */
 enum nft_meta_keys {
 	NFT_META_LEN,
@@ -592,6 +593,7 @@ enum nft_meta_keys {
 	NFT_META_L4PROTO,
 	NFT_META_BRI_IIFNAME,
 	NFT_META_BRI_OIFNAME,
+	NFT_META_PKTTYPE,
 };
 
 /**
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 852b178c6ae7..4f2862fc12c2 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -14,6 +14,9 @@
 #include <linux/netlink.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
 #include <net/dst.h>
 #include <net/sock.h>
 #include <net/tcp_states.h> /* for TCP_TIME_WAIT */
@@ -124,6 +127,30 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 		dest->data[0] = skb->secmark;
 		break;
 #endif
+	case NFT_META_PKTTYPE:
+		if (skb->pkt_type != PACKET_LOOPBACK) {
+			dest->data[0] = skb->pkt_type;
+			break;
+		}
+
+		switch (pkt->ops->pf) {
+		case NFPROTO_IPV4:
+			if (ipv4_is_multicast(ip_hdr(skb)->daddr))
+				dest->data[0] = PACKET_MULTICAST;
+			else
+				dest->data[0] = PACKET_BROADCAST;
+			break;
+		case NFPROTO_IPV6:
+			if (ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF)
+				dest->data[0] = PACKET_MULTICAST;
+			else
+				dest->data[0] = PACKET_BROADCAST;
+			break;
+		default:
+			WARN_ON(1);
+			goto err;
+		}
+		break;
 	default:
 		WARN_ON(1);
 		goto err;
@@ -195,6 +222,7 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
 #ifdef CONFIG_NETWORK_SECMARK
 	case NFT_META_SECMARK:
 #endif
+	case NFT_META_PKTTYPE:
 		break;
 	default:
 		return -EOPNOTSUPP;
-- 
cgit v1.2.3


From afc5be3079796b024823bad42dc5ebf716453575 Mon Sep 17 00:00:00 2001
From: Ana Rey <anarey@gmail.com>
Date: Sun, 24 Aug 2014 14:08:36 +0200
Subject: netfilter: nft_meta: Add cpu attribute support

Add cpu support to meta expresion.

This allows you to match packets with cpu number.

Signed-off-by: Ana Rey <anarey@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 2 ++
 net/netfilter/nft_meta.c                 | 5 +++++
 2 files changed, 7 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 98144cdd8986..c9b6f00a3fb7 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -572,6 +572,7 @@ enum nft_exthdr_attributes {
  * @NFT_META_BRI_IIFNAME: packet input bridge interface name
  * @NFT_META_BRI_OIFNAME: packet output bridge interface name
  * @NFT_META_PKTTYPE: packet type (skb->pkt_type), special handling for loopback
+ * @NFT_META_CPU: cpu id through smp_processor_id()
  */
 enum nft_meta_keys {
 	NFT_META_LEN,
@@ -594,6 +595,7 @@ enum nft_meta_keys {
 	NFT_META_BRI_IIFNAME,
 	NFT_META_BRI_OIFNAME,
 	NFT_META_PKTTYPE,
+	NFT_META_CPU,
 };
 
 /**
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 4f2862fc12c2..843e099a962d 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -17,6 +17,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <linux/smp.h>
 #include <net/dst.h>
 #include <net/sock.h>
 #include <net/tcp_states.h> /* for TCP_TIME_WAIT */
@@ -151,6 +152,9 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 			goto err;
 		}
 		break;
+	case NFT_META_CPU:
+		dest->data[0] = smp_processor_id();
+		break;
 	default:
 		WARN_ON(1);
 		goto err;
@@ -223,6 +227,7 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
 	case NFT_META_SECMARK:
 #endif
 	case NFT_META_PKTTYPE:
+	case NFT_META_CPU:
 		break;
 	default:
 		return -EOPNOTSUPP;
-- 
cgit v1.2.3


From fa71f32b5de2be1644ee671ddbe211d79be7847f Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Tue, 29 Jul 2014 17:14:21 +0200
Subject: HID: uhid: add ABI compatible UHID_GET_REPORT replacing UHID_FEATURE

The old hdev->hid_get_raw_report() was broken by design. It was never
clear what kind of HW request it should trigger. Benjamin fixed that with
the core HID cleanup, though we never really adjusted uhid.

Unfortunately, our old UHID_FEATURE command was modelled around the broken
hid_get_raw_report(). We converted it silently to the new GET_REPORT and
nothing broke. Make this explicit by renaming UHID_FEATURE to
UHID_GET_REPORT and UHID_FEATURE_ANSWER to UHID_GET_REPORT_REPLY.

Note that this is 100% ABI compatible to UHID_FEATURE. This is just a
rename. But we have to keep the old definitions around to not break API.

>From now on, UHID_GET_REPORT must trigger a GET_REPORT request on the
user-space hardware layer. All the ambiguity due to the weird "feature"
name should be gone now.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/uhid.c        | 28 ++++++++++++++--------------
 include/uapi/linux/uhid.h | 23 +++++++++++++++++++++--
 2 files changed, 35 insertions(+), 16 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c
index 2d2025a027fe..8f5e46b1bb46 100644
--- a/drivers/hid/uhid.c
+++ b/drivers/hid/uhid.c
@@ -124,8 +124,8 @@ static int uhid_hid_parse(struct hid_device *hid)
 	return hid_parse_report(hid, uhid->rd_data, uhid->rd_size);
 }
 
-static int uhid_hid_get_raw(struct hid_device *hid, unsigned char rnum,
-			    __u8 *buf, size_t count, unsigned char rtype)
+static int uhid_hid_get_report(struct hid_device *hid, unsigned char rnum,
+			       __u8 *buf, size_t count, unsigned char rtype)
 {
 	struct uhid_device *uhid = hid->driver_data;
 	__u8 report_type;
@@ -133,7 +133,7 @@ static int uhid_hid_get_raw(struct hid_device *hid, unsigned char rnum,
 	unsigned long flags;
 	int ret;
 	size_t uninitialized_var(len);
-	struct uhid_feature_answer_req *req;
+	struct uhid_get_report_reply_req *req;
 
 	if (!uhid->running)
 		return -EIO;
@@ -163,10 +163,10 @@ static int uhid_hid_get_raw(struct hid_device *hid, unsigned char rnum,
 	}
 
 	spin_lock_irqsave(&uhid->qlock, flags);
-	ev->type = UHID_FEATURE;
-	ev->u.feature.id = ++uhid->report_id;
-	ev->u.feature.rnum = rnum;
-	ev->u.feature.rtype = report_type;
+	ev->type = UHID_GET_REPORT;
+	ev->u.get_report.id = ++uhid->report_id;
+	ev->u.get_report.rnum = rnum;
+	ev->u.get_report.rtype = report_type;
 
 	uhid->report_running = true;
 	uhid_queue(uhid, ev);
@@ -182,7 +182,7 @@ static int uhid_hid_get_raw(struct hid_device *hid, unsigned char rnum,
 		ret = -ERESTARTSYS;
 	} else {
 		spin_lock_irqsave(&uhid->qlock, flags);
-		req = &uhid->report_buf.u.feature_answer;
+		req = &uhid->report_buf.u.get_report_reply;
 
 		if (req->err) {
 			ret = -EIO;
@@ -253,7 +253,7 @@ static int uhid_raw_request(struct hid_device *hid, unsigned char reportnum,
 {
 	switch (reqtype) {
 	case HID_REQ_GET_REPORT:
-		return uhid_hid_get_raw(hid, reportnum, buf, len, rtype);
+		return uhid_hid_get_report(hid, reportnum, buf, len, rtype);
 	case HID_REQ_SET_REPORT:
 		/* TODO: implement proper SET_REPORT functionality */
 		return -ENOSYS;
@@ -487,8 +487,8 @@ static int uhid_dev_input2(struct uhid_device *uhid, struct uhid_event *ev)
 	return 0;
 }
 
-static int uhid_dev_feature_answer(struct uhid_device *uhid,
-				   struct uhid_event *ev)
+static int uhid_dev_get_report_reply(struct uhid_device *uhid,
+				     struct uhid_event *ev)
 {
 	unsigned long flags;
 
@@ -498,7 +498,7 @@ static int uhid_dev_feature_answer(struct uhid_device *uhid,
 	spin_lock_irqsave(&uhid->qlock, flags);
 
 	/* id for old report; drop it silently */
-	if (uhid->report_id != ev->u.feature_answer.id)
+	if (uhid->report_id != ev->u.get_report_reply.id)
 		goto unlock;
 	if (!uhid->report_running)
 		goto unlock;
@@ -634,8 +634,8 @@ static ssize_t uhid_char_write(struct file *file, const char __user *buffer,
 	case UHID_INPUT2:
 		ret = uhid_dev_input2(uhid, &uhid->input_buf);
 		break;
-	case UHID_FEATURE_ANSWER:
-		ret = uhid_dev_feature_answer(uhid, &uhid->input_buf);
+	case UHID_GET_REPORT_REPLY:
+		ret = uhid_dev_get_report_reply(uhid, &uhid->input_buf);
 		break;
 	default:
 		ret = -EOPNOTSUPP;
diff --git a/include/uapi/linux/uhid.h b/include/uapi/linux/uhid.h
index 1e3b09c191cd..0a08f2bbe642 100644
--- a/include/uapi/linux/uhid.h
+++ b/include/uapi/linux/uhid.h
@@ -33,8 +33,10 @@ enum uhid_event_type {
 	UHID_OUTPUT,
 	UHID_OUTPUT_EV,			/* obsolete! */
 	UHID_INPUT,
-	UHID_FEATURE,
-	UHID_FEATURE_ANSWER,
+	UHID_FEATURE,			/* obsolete! use UHID_GET_REPORT */
+	UHID_GET_REPORT = UHID_FEATURE,
+	UHID_FEATURE_ANSWER,		/* obsolete! use UHID_GET_REPORT_REPLY */
+	UHID_GET_REPORT_REPLY = UHID_FEATURE_ANSWER,
 	UHID_CREATE2,
 	UHID_INPUT2,
 };
@@ -98,12 +100,20 @@ struct uhid_output_ev_req {
 	__s32 value;
 } __attribute__((__packed__));
 
+/* Obsolete! Kernel uses ABI compatible UHID_GET_REPORT. */
 struct uhid_feature_req {
 	__u32 id;
 	__u8 rnum;
 	__u8 rtype;
 } __attribute__((__packed__));
 
+struct uhid_get_report_req {
+	__u32 id;
+	__u8 rnum;
+	__u8 rtype;
+} __attribute__((__packed__));
+
+/* Obsolete! Use ABI compatible UHID_GET_REPORT_REPLY. */
 struct uhid_feature_answer_req {
 	__u32 id;
 	__u16 err;
@@ -111,6 +121,13 @@ struct uhid_feature_answer_req {
 	__u8 data[UHID_DATA_MAX];
 } __attribute__((__packed__));
 
+struct uhid_get_report_reply_req {
+	__u32 id;
+	__u16 err;
+	__u16 size;
+	__u8 data[UHID_DATA_MAX];
+} __attribute__((__packed__));
+
 struct uhid_event {
 	__u32 type;
 
@@ -120,7 +137,9 @@ struct uhid_event {
 		struct uhid_output_req output;
 		struct uhid_output_ev_req output_ev;
 		struct uhid_feature_req feature;
+		struct uhid_get_report_req get_report;
 		struct uhid_feature_answer_req feature_answer;
+		struct uhid_get_report_reply_req get_report_reply;
 		struct uhid_create2_req create2;
 		struct uhid_input2_req input2;
 	} u;
-- 
cgit v1.2.3


From 50598e7055d0d8610732e7eb2c84cbc3bc7db294 Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Tue, 29 Jul 2014 17:14:22 +0200
Subject: HID: uhid: keep legacy definitions at the bottom of uhid.h

Instead of inlining the legacy definitions into the main part of uhid.h,
keep them at the bottom now. This way, the API is much easier to read and
legacy requests can be looked up at a separate place.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/uapi/linux/uhid.h | 101 ++++++++++++++++++++++++++++------------------
 1 file changed, 62 insertions(+), 39 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/uhid.h b/include/uapi/linux/uhid.h
index 0a08f2bbe642..116536eeae62 100644
--- a/include/uapi/linux/uhid.h
+++ b/include/uapi/linux/uhid.h
@@ -24,37 +24,21 @@
 #include <linux/hid.h>
 
 enum uhid_event_type {
-	UHID_CREATE,
+	__UHID_LEGACY_CREATE,
 	UHID_DESTROY,
 	UHID_START,
 	UHID_STOP,
 	UHID_OPEN,
 	UHID_CLOSE,
 	UHID_OUTPUT,
-	UHID_OUTPUT_EV,			/* obsolete! */
-	UHID_INPUT,
-	UHID_FEATURE,			/* obsolete! use UHID_GET_REPORT */
-	UHID_GET_REPORT = UHID_FEATURE,
-	UHID_FEATURE_ANSWER,		/* obsolete! use UHID_GET_REPORT_REPLY */
-	UHID_GET_REPORT_REPLY = UHID_FEATURE_ANSWER,
+	__UHID_LEGACY_OUTPUT_EV,
+	__UHID_LEGACY_INPUT,
+	UHID_GET_REPORT,
+	UHID_GET_REPORT_REPLY,
 	UHID_CREATE2,
 	UHID_INPUT2,
 };
 
-struct uhid_create_req {
-	__u8 name[128];
-	__u8 phys[64];
-	__u8 uniq[64];
-	__u8 __user *rd_data;
-	__u16 rd_size;
-
-	__u16 bus;
-	__u32 vendor;
-	__u32 product;
-	__u32 version;
-	__u32 country;
-} __attribute__((__packed__));
-
 struct uhid_create2_req {
 	__u8 name[128];
 	__u8 phys[64];
@@ -76,24 +60,67 @@ enum uhid_report_type {
 	UHID_INPUT_REPORT,
 };
 
-struct uhid_input_req {
+struct uhid_input2_req {
+	__u16 size;
+	__u8 data[UHID_DATA_MAX];
+} __attribute__((__packed__));
+
+struct uhid_output_req {
 	__u8 data[UHID_DATA_MAX];
 	__u16 size;
+	__u8 rtype;
 } __attribute__((__packed__));
 
-struct uhid_input2_req {
+struct uhid_get_report_req {
+	__u32 id;
+	__u8 rnum;
+	__u8 rtype;
+} __attribute__((__packed__));
+
+struct uhid_get_report_reply_req {
+	__u32 id;
+	__u16 err;
 	__u16 size;
 	__u8 data[UHID_DATA_MAX];
 } __attribute__((__packed__));
 
-struct uhid_output_req {
+/*
+ * Compat Layer
+ * All these commands and requests are obsolete. You should avoid using them in
+ * new code. We support them for backwards-compatibility, but you might not get
+ * access to new feature in case you use them.
+ */
+
+enum uhid_legacy_event_type {
+	UHID_CREATE			= __UHID_LEGACY_CREATE,
+	UHID_OUTPUT_EV			= __UHID_LEGACY_OUTPUT_EV,
+	UHID_INPUT			= __UHID_LEGACY_INPUT,
+	UHID_FEATURE			= UHID_GET_REPORT,
+	UHID_FEATURE_ANSWER		= UHID_GET_REPORT_REPLY,
+};
+
+/* Obsolete! Use UHID_CREATE2. */
+struct uhid_create_req {
+	__u8 name[128];
+	__u8 phys[64];
+	__u8 uniq[64];
+	__u8 __user *rd_data;
+	__u16 rd_size;
+
+	__u16 bus;
+	__u32 vendor;
+	__u32 product;
+	__u32 version;
+	__u32 country;
+} __attribute__((__packed__));
+
+/* Obsolete! Use UHID_INPUT2. */
+struct uhid_input_req {
 	__u8 data[UHID_DATA_MAX];
 	__u16 size;
-	__u8 rtype;
 } __attribute__((__packed__));
 
-/* Obsolete! Newer kernels will no longer send these events but instead convert
- * it into raw output reports via UHID_OUTPUT. */
+/* Obsolete! Kernel uses UHID_OUTPUT exclusively now. */
 struct uhid_output_ev_req {
 	__u16 type;
 	__u16 code;
@@ -107,12 +134,6 @@ struct uhid_feature_req {
 	__u8 rtype;
 } __attribute__((__packed__));
 
-struct uhid_get_report_req {
-	__u32 id;
-	__u8 rnum;
-	__u8 rtype;
-} __attribute__((__packed__));
-
 /* Obsolete! Use ABI compatible UHID_GET_REPORT_REPLY. */
 struct uhid_feature_answer_req {
 	__u32 id;
@@ -121,12 +142,14 @@ struct uhid_feature_answer_req {
 	__u8 data[UHID_DATA_MAX];
 } __attribute__((__packed__));
 
-struct uhid_get_report_reply_req {
-	__u32 id;
-	__u16 err;
-	__u16 size;
-	__u8 data[UHID_DATA_MAX];
-} __attribute__((__packed__));
+/*
+ * UHID Events
+ * All UHID events from and to the kernel are encoded as "struct uhid_event".
+ * The "type" field contains a UHID_* type identifier. All payload depends on
+ * that type and can be accessed via ev->u.XYZ accordingly.
+ * If user-space writes short events, they're extended with 0s by the kernel. If
+ * the kernel writes short events, user-space shall extend them with 0s.
+ */
 
 struct uhid_event {
 	__u32 type;
-- 
cgit v1.2.3


From 11c221553080408b203a00b91ad5f647dfb218d1 Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Tue, 29 Jul 2014 17:14:24 +0200
Subject: HID: uhid: implement SET_REPORT

We so far lacked support for hid_hw_raw_request(..., HID_REQ_SET_REPORT);
Add support for it and simply forward the request to user-space. Note that
SET_REPORT is synchronous, just like GET_REPORT, even though it does not
provide any data back besides an error code.

If a transport layer does SET_REPORT asynchronously, they can just ACK it
immediately by writing an uhid_set_report_reply to uhid.

This patch re-uses the synchronous uhid-report infrastructure to query
user-space. Note that this means you cannot run SET_REPORT and GET_REPORT
in parallel. However, that has always been a restriction of HID and due to
its blocking nature, this is just fine. Maybe some future transport layer
supports parallel requests (very unlikely), however, until then lets not
over-complicate things and avoid request-lookup-tables.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/uhid.c        | 206 +++++++++++++++++++++++++++++++---------------
 include/uapi/linux/uhid.h |  17 ++++
 2 files changed, 155 insertions(+), 68 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c
index 8bf613e3783d..19511481a7d3 100644
--- a/drivers/hid/uhid.c
+++ b/drivers/hid/uhid.c
@@ -49,6 +49,7 @@ struct uhid_device {
 	wait_queue_head_t report_wait;
 	bool report_running;
 	u32 report_id;
+	u32 report_type;
 	struct uhid_event report_buf;
 };
 
@@ -124,95 +125,166 @@ static int uhid_hid_parse(struct hid_device *hid)
 	return hid_parse_report(hid, uhid->rd_data, uhid->rd_size);
 }
 
+/* must be called with report_lock held */
+static int __uhid_report_queue_and_wait(struct uhid_device *uhid,
+					struct uhid_event *ev,
+					__u32 *report_id)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&uhid->qlock, flags);
+	*report_id = ++uhid->report_id;
+	uhid->report_type = ev->type;
+	uhid->report_running = true;
+	uhid_queue(uhid, ev);
+	spin_unlock_irqrestore(&uhid->qlock, flags);
+
+	ret = wait_event_interruptible_timeout(uhid->report_wait,
+				!uhid->report_running || !uhid->running,
+				5 * HZ);
+	if (!ret || !uhid->running || uhid->report_running)
+		ret = -EIO;
+	else if (ret < 0)
+		ret = -ERESTARTSYS;
+	else
+		ret = 0;
+
+	uhid->report_running = false;
+
+	return ret;
+}
+
+static void uhid_report_wake_up(struct uhid_device *uhid, u32 id,
+				const struct uhid_event *ev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&uhid->qlock, flags);
+
+	/* id for old report; drop it silently */
+	if (uhid->report_type != ev->type || uhid->report_id != id)
+		goto unlock;
+	if (!uhid->report_running)
+		goto unlock;
+
+	memcpy(&uhid->report_buf, ev, sizeof(*ev));
+	uhid->report_running = false;
+	wake_up_interruptible(&uhid->report_wait);
+
+unlock:
+	spin_unlock_irqrestore(&uhid->qlock, flags);
+}
+
 static int uhid_hid_get_report(struct hid_device *hid, unsigned char rnum,
-			       __u8 *buf, size_t count, unsigned char rtype)
+			       u8 *buf, size_t count, u8 rtype)
 {
 	struct uhid_device *uhid = hid->driver_data;
-	__u8 report_type;
+	struct uhid_get_report_reply_req *req;
 	struct uhid_event *ev;
-	unsigned long flags;
 	int ret;
-	size_t uninitialized_var(len);
-	struct uhid_get_report_reply_req *req;
 
 	if (!uhid->running)
 		return -EIO;
 
-	switch (rtype) {
-	case HID_FEATURE_REPORT:
-		report_type = UHID_FEATURE_REPORT;
-		break;
-	case HID_OUTPUT_REPORT:
-		report_type = UHID_OUTPUT_REPORT;
-		break;
-	case HID_INPUT_REPORT:
-		report_type = UHID_INPUT_REPORT;
-		break;
-	default:
-		return -EINVAL;
-	}
+	ev = kzalloc(sizeof(*ev), GFP_KERNEL);
+	if (!ev)
+		return -ENOMEM;
+
+	ev->type = UHID_GET_REPORT;
+	ev->u.get_report.rnum = rnum;
+	ev->u.get_report.rtype = rtype;
 
 	ret = mutex_lock_interruptible(&uhid->report_lock);
-	if (ret)
+	if (ret) {
+		kfree(ev);
 		return ret;
+	}
 
-	ev = kzalloc(sizeof(*ev), GFP_KERNEL);
-	if (!ev) {
-		ret = -ENOMEM;
+	/* this _always_ takes ownership of @ev */
+	ret = __uhid_report_queue_and_wait(uhid, ev, &ev->u.get_report.id);
+	if (ret)
 		goto unlock;
+
+	req = &uhid->report_buf.u.get_report_reply;
+	if (req->err) {
+		ret = -EIO;
+	} else {
+		ret = min3(count, (size_t)req->size, (size_t)UHID_DATA_MAX);
+		memcpy(buf, req->data, ret);
 	}
 
-	spin_lock_irqsave(&uhid->qlock, flags);
-	ev->type = UHID_GET_REPORT;
-	ev->u.get_report.id = ++uhid->report_id;
-	ev->u.get_report.rnum = rnum;
-	ev->u.get_report.rtype = report_type;
+unlock:
+	mutex_unlock(&uhid->report_lock);
+	return ret;
+}
 
-	uhid->report_running = true;
-	uhid_queue(uhid, ev);
-	spin_unlock_irqrestore(&uhid->qlock, flags);
+static int uhid_hid_set_report(struct hid_device *hid, unsigned char rnum,
+			       const u8 *buf, size_t count, u8 rtype)
+{
+	struct uhid_device *uhid = hid->driver_data;
+	struct uhid_event *ev;
+	int ret;
 
-	ret = wait_event_interruptible_timeout(uhid->report_wait,
-				!uhid->report_running || !uhid->running,
-				5 * HZ);
+	if (!uhid->running || count > UHID_DATA_MAX)
+		return -EIO;
 
-	if (!ret || !uhid->running) {
-		ret = -EIO;
-	} else if (ret < 0) {
-		ret = -ERESTARTSYS;
-	} else {
-		spin_lock_irqsave(&uhid->qlock, flags);
-		req = &uhid->report_buf.u.get_report_reply;
+	ev = kzalloc(sizeof(*ev), GFP_KERNEL);
+	if (!ev)
+		return -ENOMEM;
 
-		if (req->err) {
-			ret = -EIO;
-		} else {
-			ret = 0;
-			len = min(count,
-				min_t(size_t, req->size, UHID_DATA_MAX));
-			memcpy(buf, req->data, len);
-		}
+	ev->type = UHID_SET_REPORT;
+	ev->u.set_report.rnum = rnum;
+	ev->u.set_report.rtype = rtype;
+	ev->u.set_report.size = count;
+	memcpy(ev->u.set_report.data, buf, count);
 
-		spin_unlock_irqrestore(&uhid->qlock, flags);
+	ret = mutex_lock_interruptible(&uhid->report_lock);
+	if (ret) {
+		kfree(ev);
+		return ret;
 	}
 
-	uhid->report_running = false;
+	/* this _always_ takes ownership of @ev */
+	ret = __uhid_report_queue_and_wait(uhid, ev, &ev->u.set_report.id);
+	if (ret)
+		goto unlock;
+
+	if (uhid->report_buf.u.set_report_reply.err)
+		ret = -EIO;
+	else
+		ret = count;
 
 unlock:
 	mutex_unlock(&uhid->report_lock);
-	return ret ? ret : len;
+	return ret;
 }
 
 static int uhid_hid_raw_request(struct hid_device *hid, unsigned char reportnum,
 				__u8 *buf, size_t len, unsigned char rtype,
 				int reqtype)
 {
+	u8 u_rtype;
+
+	switch (rtype) {
+	case HID_FEATURE_REPORT:
+		u_rtype = UHID_FEATURE_REPORT;
+		break;
+	case HID_OUTPUT_REPORT:
+		u_rtype = UHID_OUTPUT_REPORT;
+		break;
+	case HID_INPUT_REPORT:
+		u_rtype = UHID_INPUT_REPORT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
 	switch (reqtype) {
 	case HID_REQ_GET_REPORT:
-		return uhid_hid_get_report(hid, reportnum, buf, len, rtype);
+		return uhid_hid_get_report(hid, reportnum, buf, len, u_rtype);
 	case HID_REQ_SET_REPORT:
-		/* TODO: implement proper SET_REPORT functionality */
-		return -ENOSYS;
+		return uhid_hid_set_report(hid, reportnum, buf, len, u_rtype);
 	default:
 		return -EIO;
 	}
@@ -490,25 +562,20 @@ static int uhid_dev_input2(struct uhid_device *uhid, struct uhid_event *ev)
 static int uhid_dev_get_report_reply(struct uhid_device *uhid,
 				     struct uhid_event *ev)
 {
-	unsigned long flags;
-
 	if (!uhid->running)
 		return -EINVAL;
 
-	spin_lock_irqsave(&uhid->qlock, flags);
-
-	/* id for old report; drop it silently */
-	if (uhid->report_id != ev->u.get_report_reply.id)
-		goto unlock;
-	if (!uhid->report_running)
-		goto unlock;
+	uhid_report_wake_up(uhid, ev->u.get_report_reply.id, ev);
+	return 0;
+}
 
-	memcpy(&uhid->report_buf, ev, sizeof(*ev));
-	uhid->report_running = false;
-	wake_up_interruptible(&uhid->report_wait);
+static int uhid_dev_set_report_reply(struct uhid_device *uhid,
+				     struct uhid_event *ev)
+{
+	if (!uhid->running)
+		return -EINVAL;
 
-unlock:
-	spin_unlock_irqrestore(&uhid->qlock, flags);
+	uhid_report_wake_up(uhid, ev->u.set_report_reply.id, ev);
 	return 0;
 }
 
@@ -637,6 +704,9 @@ static ssize_t uhid_char_write(struct file *file, const char __user *buffer,
 	case UHID_GET_REPORT_REPLY:
 		ret = uhid_dev_get_report_reply(uhid, &uhid->input_buf);
 		break;
+	case UHID_SET_REPORT_REPLY:
+		ret = uhid_dev_set_report_reply(uhid, &uhid->input_buf);
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 	}
diff --git a/include/uapi/linux/uhid.h b/include/uapi/linux/uhid.h
index 116536eeae62..62aac0e4edf3 100644
--- a/include/uapi/linux/uhid.h
+++ b/include/uapi/linux/uhid.h
@@ -37,6 +37,8 @@ enum uhid_event_type {
 	UHID_GET_REPORT_REPLY,
 	UHID_CREATE2,
 	UHID_INPUT2,
+	UHID_SET_REPORT,
+	UHID_SET_REPORT_REPLY,
 };
 
 struct uhid_create2_req {
@@ -84,6 +86,19 @@ struct uhid_get_report_reply_req {
 	__u8 data[UHID_DATA_MAX];
 } __attribute__((__packed__));
 
+struct uhid_set_report_req {
+	__u32 id;
+	__u8 rnum;
+	__u8 rtype;
+	__u16 size;
+	__u8 data[UHID_DATA_MAX];
+} __attribute__((__packed__));
+
+struct uhid_set_report_reply_req {
+	__u32 id;
+	__u16 err;
+} __attribute__((__packed__));
+
 /*
  * Compat Layer
  * All these commands and requests are obsolete. You should avoid using them in
@@ -165,6 +180,8 @@ struct uhid_event {
 		struct uhid_get_report_reply_req get_report_reply;
 		struct uhid_create2_req create2;
 		struct uhid_input2_req input2;
+		struct uhid_set_report_req set_report;
+		struct uhid_set_report_reply_req set_report_reply;
 	} u;
 } __attribute__((__packed__));
 
-- 
cgit v1.2.3


From c2b2f16c5c62583d4f8904e44c4b30c94a01eaf1 Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Tue, 29 Jul 2014 17:14:25 +0200
Subject: HID: uhid: report to user-space whether reports are numbered

This makes UHID_START include a "dev_flags" field that describes details
of the hid-device in the kernel. The first flags we introduce describe
whether a given report-type uses numbered reports. This is useful for
transport layers that force report-numbers and therefore might have to
prefix kernel-provided HID-messages with the report-number.

Currently, only HoG needs this and the spec only talks about "global
report numbers". That is, it's a global boolean not a per-type boolean.
However, given the quirks we already have in kernel-space, a per-type
value seems much more appropriate.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/uhid.c        | 21 ++++++++++++++++++++-
 include/uapi/linux/uhid.h | 11 +++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c
index 19511481a7d3..f6ec5eaf6b89 100644
--- a/drivers/hid/uhid.c
+++ b/drivers/hid/uhid.c
@@ -92,8 +92,27 @@ static int uhid_queue_event(struct uhid_device *uhid, __u32 event)
 static int uhid_hid_start(struct hid_device *hid)
 {
 	struct uhid_device *uhid = hid->driver_data;
+	struct uhid_event *ev;
+	unsigned long flags;
+
+	ev = kzalloc(sizeof(*ev), GFP_KERNEL);
+	if (!ev)
+		return -ENOMEM;
+
+	ev->type = UHID_START;
 
-	return uhid_queue_event(uhid, UHID_START);
+	if (hid->report_enum[HID_FEATURE_REPORT].numbered)
+		ev->u.start.dev_flags |= UHID_DEV_NUMBERED_FEATURE_REPORTS;
+	if (hid->report_enum[HID_OUTPUT_REPORT].numbered)
+		ev->u.start.dev_flags |= UHID_DEV_NUMBERED_OUTPUT_REPORTS;
+	if (hid->report_enum[HID_INPUT_REPORT].numbered)
+		ev->u.start.dev_flags |= UHID_DEV_NUMBERED_INPUT_REPORTS;
+
+	spin_lock_irqsave(&uhid->qlock, flags);
+	uhid_queue(uhid, ev);
+	spin_unlock_irqrestore(&uhid->qlock, flags);
+
+	return 0;
 }
 
 static void uhid_hid_stop(struct hid_device *hid)
diff --git a/include/uapi/linux/uhid.h b/include/uapi/linux/uhid.h
index 62aac0e4edf3..aaa86d6bd1dd 100644
--- a/include/uapi/linux/uhid.h
+++ b/include/uapi/linux/uhid.h
@@ -54,6 +54,16 @@ struct uhid_create2_req {
 	__u8 rd_data[HID_MAX_DESCRIPTOR_SIZE];
 } __attribute__((__packed__));
 
+enum uhid_dev_flag {
+	UHID_DEV_NUMBERED_FEATURE_REPORTS			= (1ULL << 0),
+	UHID_DEV_NUMBERED_OUTPUT_REPORTS			= (1ULL << 1),
+	UHID_DEV_NUMBERED_INPUT_REPORTS				= (1ULL << 2),
+};
+
+struct uhid_start_req {
+	__u64 dev_flags;
+};
+
 #define UHID_DATA_MAX 4096
 
 enum uhid_report_type {
@@ -182,6 +192,7 @@ struct uhid_event {
 		struct uhid_input2_req input2;
 		struct uhid_set_report_req set_report;
 		struct uhid_set_report_reply_req set_report_reply;
+		struct uhid_start_req start;
 	} u;
 } __attribute__((__packed__));
 
-- 
cgit v1.2.3


From 96c2737716d586a218bc795fcb79d2e2b6003081 Mon Sep 17 00:00:00 2001
From: Valentina Manea <valentina.manea.m@gmail.com>
Date: Wed, 20 Aug 2014 07:31:00 +0300
Subject: usbip: move usbip kernel code out of staging

At this point, USB/IP kernel code is fully functional
and can be moved out of staging.

Signed-off-by: Valentina Manea <valentina.manea.m@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/Kconfig                  |    2 -
 drivers/staging/Makefile                 |    1 -
 drivers/staging/usbip/Kconfig            |   41 --
 drivers/staging/usbip/Makefile           |   10 -
 drivers/staging/usbip/README             |    7 -
 drivers/staging/usbip/stub.h             |  113 ---
 drivers/staging/usbip/stub_dev.c         |  525 --------------
 drivers/staging/usbip/stub_main.c        |  335 ---------
 drivers/staging/usbip/stub_rx.c          |  594 ---------------
 drivers/staging/usbip/stub_tx.c          |  398 ----------
 drivers/staging/usbip/uapi/usbip.h       |   26 -
 drivers/staging/usbip/usbip_common.c     |  776 --------------------
 drivers/staging/usbip/usbip_common.h     |  335 ---------
 drivers/staging/usbip/usbip_event.c      |  128 ----
 drivers/staging/usbip/usbip_protocol.txt |  358 ---------
 drivers/staging/usbip/vhci.h             |  129 ----
 drivers/staging/usbip/vhci_hcd.c         | 1171 ------------------------------
 drivers/staging/usbip/vhci_rx.c          |  268 -------
 drivers/staging/usbip/vhci_sysfs.c       |  252 -------
 drivers/staging/usbip/vhci_tx.c          |  224 ------
 drivers/usb/Kconfig                      |    2 +
 drivers/usb/Makefile                     |    2 +
 drivers/usb/usbip/Kconfig                |   41 ++
 drivers/usb/usbip/Makefile               |   10 +
 drivers/usb/usbip/README                 |    7 +
 drivers/usb/usbip/stub.h                 |  113 +++
 drivers/usb/usbip/stub_dev.c             |  525 ++++++++++++++
 drivers/usb/usbip/stub_main.c            |  335 +++++++++
 drivers/usb/usbip/stub_rx.c              |  594 +++++++++++++++
 drivers/usb/usbip/stub_tx.c              |  398 ++++++++++
 drivers/usb/usbip/usbip_common.c         |  776 ++++++++++++++++++++
 drivers/usb/usbip/usbip_common.h         |  335 +++++++++
 drivers/usb/usbip/usbip_event.c          |  128 ++++
 drivers/usb/usbip/usbip_protocol.txt     |  358 +++++++++
 drivers/usb/usbip/vhci.h                 |  129 ++++
 drivers/usb/usbip/vhci_hcd.c             | 1171 ++++++++++++++++++++++++++++++
 drivers/usb/usbip/vhci_rx.c              |  268 +++++++
 drivers/usb/usbip/vhci_sysfs.c           |  252 +++++++
 drivers/usb/usbip/vhci_tx.c              |  224 ++++++
 include/uapi/linux/usbip.h               |   26 +
 40 files changed, 5694 insertions(+), 5693 deletions(-)
 delete mode 100644 drivers/staging/usbip/Kconfig
 delete mode 100644 drivers/staging/usbip/Makefile
 delete mode 100644 drivers/staging/usbip/README
 delete mode 100644 drivers/staging/usbip/stub.h
 delete mode 100644 drivers/staging/usbip/stub_dev.c
 delete mode 100644 drivers/staging/usbip/stub_main.c
 delete mode 100644 drivers/staging/usbip/stub_rx.c
 delete mode 100644 drivers/staging/usbip/stub_tx.c
 delete mode 100644 drivers/staging/usbip/uapi/usbip.h
 delete mode 100644 drivers/staging/usbip/usbip_common.c
 delete mode 100644 drivers/staging/usbip/usbip_common.h
 delete mode 100644 drivers/staging/usbip/usbip_event.c
 delete mode 100644 drivers/staging/usbip/usbip_protocol.txt
 delete mode 100644 drivers/staging/usbip/vhci.h
 delete mode 100644 drivers/staging/usbip/vhci_hcd.c
 delete mode 100644 drivers/staging/usbip/vhci_rx.c
 delete mode 100644 drivers/staging/usbip/vhci_sysfs.c
 delete mode 100644 drivers/staging/usbip/vhci_tx.c
 create mode 100644 drivers/usb/usbip/Kconfig
 create mode 100644 drivers/usb/usbip/Makefile
 create mode 100644 drivers/usb/usbip/README
 create mode 100644 drivers/usb/usbip/stub.h
 create mode 100644 drivers/usb/usbip/stub_dev.c
 create mode 100644 drivers/usb/usbip/stub_main.c
 create mode 100644 drivers/usb/usbip/stub_rx.c
 create mode 100644 drivers/usb/usbip/stub_tx.c
 create mode 100644 drivers/usb/usbip/usbip_common.c
 create mode 100644 drivers/usb/usbip/usbip_common.h
 create mode 100644 drivers/usb/usbip/usbip_event.c
 create mode 100644 drivers/usb/usbip/usbip_protocol.txt
 create mode 100644 drivers/usb/usbip/vhci.h
 create mode 100644 drivers/usb/usbip/vhci_hcd.c
 create mode 100644 drivers/usb/usbip/vhci_rx.c
 create mode 100644 drivers/usb/usbip/vhci_sysfs.c
 create mode 100644 drivers/usb/usbip/vhci_tx.c
 create mode 100644 include/uapi/linux/usbip.h

(limited to 'include/uapi')

diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 2c486ea6236b..35b494f5667f 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -28,8 +28,6 @@ source "drivers/staging/et131x/Kconfig"
 
 source "drivers/staging/slicoss/Kconfig"
 
-source "drivers/staging/usbip/Kconfig"
-
 source "drivers/staging/wlan-ng/Kconfig"
 
 source "drivers/staging/comedi/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 1e1a3a10faf7..e66a5dbd9b02 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -6,7 +6,6 @@ obj-$(CONFIG_STAGING)		+= staging.o
 obj-y				+= media/
 obj-$(CONFIG_ET131X)		+= et131x/
 obj-$(CONFIG_SLICOSS)		+= slicoss/
-obj-$(CONFIG_USBIP_CORE)	+= usbip/
 obj-$(CONFIG_PRISM2_USB)	+= wlan-ng/
 obj-$(CONFIG_COMEDI)		+= comedi/
 obj-$(CONFIG_FB_OLPC_DCON)	+= olpc_dcon/
diff --git a/drivers/staging/usbip/Kconfig b/drivers/staging/usbip/Kconfig
deleted file mode 100644
index bd99e9e47e50..000000000000
--- a/drivers/staging/usbip/Kconfig
+++ /dev/null
@@ -1,41 +0,0 @@
-config USBIP_CORE
-	tristate "USB/IP support"
-	depends on USB && NET
-	---help---
-	  This enables pushing USB packets over IP to allow remote
-	  machines direct access to USB devices. It provides the
-	  USB/IP core that is required by both drivers.
-
-	  For more details, and to get the userspace utility
-	  programs, please see <http://usbip.sourceforge.net/>.
-
-	  To compile this as a module, choose M here: the module will
-	  be called usbip-core.
-
-	  If unsure, say N.
-
-config USBIP_VHCI_HCD
-	tristate "VHCI hcd"
-	depends on USBIP_CORE
-	---help---
-	  This enables the USB/IP virtual host controller driver,
-	  which is run on the remote machine.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called vhci-hcd.
-
-config USBIP_HOST
-	tristate "Host driver"
-	depends on USBIP_CORE
-	---help---
-	  This enables the USB/IP host driver, which is run on the
-	  machine that is sharing the USB devices.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called usbip-host.
-
-config USBIP_DEBUG
-	bool "Debug messages for USB/IP"
-	depends on USBIP_CORE
-	---help---
-	  This enables the debug messages from the USB/IP drivers.
diff --git a/drivers/staging/usbip/Makefile b/drivers/staging/usbip/Makefile
deleted file mode 100644
index 9ecd61545be1..000000000000
--- a/drivers/staging/usbip/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-ccflags-$(CONFIG_USBIP_DEBUG) := -DDEBUG
-
-obj-$(CONFIG_USBIP_CORE) += usbip-core.o
-usbip-core-y := usbip_common.o usbip_event.o
-
-obj-$(CONFIG_USBIP_VHCI_HCD) += vhci-hcd.o
-vhci-hcd-y := vhci_sysfs.o vhci_tx.o vhci_rx.o vhci_hcd.o
-
-obj-$(CONFIG_USBIP_HOST) += usbip-host.o
-usbip-host-y := stub_dev.o stub_main.o stub_rx.o stub_tx.o
diff --git a/drivers/staging/usbip/README b/drivers/staging/usbip/README
deleted file mode 100644
index 41a2cf2e77a6..000000000000
--- a/drivers/staging/usbip/README
+++ /dev/null
@@ -1,7 +0,0 @@
-TODO:
-	- more discussion about the protocol
-	- testing
-	- review of the userspace interface
-	- document the protocol
-
-Please send patches for this code to Greg Kroah-Hartman <greg@kroah.com>
diff --git a/drivers/staging/usbip/stub.h b/drivers/staging/usbip/stub.h
deleted file mode 100644
index 266e2b0ce9a8..000000000000
--- a/drivers/staging/usbip/stub.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (C) 2003-2008 Takahiro Hirofuchi
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- * USA.
- */
-
-#ifndef __USBIP_STUB_H
-#define __USBIP_STUB_H
-
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/usb.h>
-#include <linux/wait.h>
-
-#define STUB_BUSID_OTHER 0
-#define STUB_BUSID_REMOV 1
-#define STUB_BUSID_ADDED 2
-#define STUB_BUSID_ALLOC 3
-
-struct stub_device {
-	struct usb_interface *interface;
-	struct usb_device *udev;
-
-	struct usbip_device ud;
-	__u32 devid;
-
-	/*
-	 * stub_priv preserves private data of each urb.
-	 * It is allocated as stub_priv_cache and assigned to urb->context.
-	 *
-	 * stub_priv is always linked to any one of 3 lists;
-	 *	priv_init: linked to this until the comletion of a urb.
-	 *	priv_tx  : linked to this after the completion of a urb.
-	 *	priv_free: linked to this after the sending of the result.
-	 *
-	 * Any of these list operations should be locked by priv_lock.
-	 */
-	spinlock_t priv_lock;
-	struct list_head priv_init;
-	struct list_head priv_tx;
-	struct list_head priv_free;
-
-	/* see comments for unlinking in stub_rx.c */
-	struct list_head unlink_tx;
-	struct list_head unlink_free;
-
-	wait_queue_head_t tx_waitq;
-};
-
-/* private data into urb->priv */
-struct stub_priv {
-	unsigned long seqnum;
-	struct list_head list;
-	struct stub_device *sdev;
-	struct urb *urb;
-
-	int unlinking;
-};
-
-struct stub_unlink {
-	unsigned long seqnum;
-	struct list_head list;
-	__u32 status;
-};
-
-/* same as SYSFS_BUS_ID_SIZE */
-#define BUSID_SIZE 32
-
-struct bus_id_priv {
-	char name[BUSID_SIZE];
-	char status;
-	int interf_count;
-	struct stub_device *sdev;
-	struct usb_device *udev;
-	char shutdown_busid;
-};
-
-/* stub_priv is allocated from stub_priv_cache */
-extern struct kmem_cache *stub_priv_cache;
-
-/* stub_dev.c */
-extern struct usb_device_driver stub_driver;
-
-/* stub_main.c */
-struct bus_id_priv *get_busid_priv(const char *busid);
-int del_match_busid(char *busid);
-void stub_device_cleanup_urbs(struct stub_device *sdev);
-
-/* stub_rx.c */
-int stub_rx_loop(void *data);
-
-/* stub_tx.c */
-void stub_enqueue_ret_unlink(struct stub_device *sdev, __u32 seqnum,
-			     __u32 status);
-void stub_complete(struct urb *urb);
-int stub_tx_loop(void *data);
-
-#endif /* __USBIP_STUB_H */
diff --git a/drivers/staging/usbip/stub_dev.c b/drivers/staging/usbip/stub_dev.c
deleted file mode 100644
index 51d0c7188738..000000000000
--- a/drivers/staging/usbip/stub_dev.c
+++ /dev/null
@@ -1,525 +0,0 @@
-/*
- * Copyright (C) 2003-2008 Takahiro Hirofuchi
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- * USA.
- */
-
-#include <linux/device.h>
-#include <linux/file.h>
-#include <linux/kthread.h>
-#include <linux/module.h>
-
-#include "usbip_common.h"
-#include "stub.h"
-
-/*
- * Define device IDs here if you want to explicitly limit exportable devices.
- * In most cases, wildcard matching will be okay because driver binding can be
- * changed dynamically by a userland program.
- */
-static struct usb_device_id stub_table[] = {
-#if 0
-	/* just an example */
-	{ USB_DEVICE(0x05ac, 0x0301) },   /* Mac 1 button mouse */
-	{ USB_DEVICE(0x0430, 0x0009) },   /* Plat Home Keyboard */
-	{ USB_DEVICE(0x059b, 0x0001) },   /* Iomega USB Zip 100 */
-	{ USB_DEVICE(0x04b3, 0x4427) },   /* IBM USB CD-ROM */
-	{ USB_DEVICE(0x05a9, 0xa511) },   /* LifeView USB cam */
-	{ USB_DEVICE(0x55aa, 0x0201) },   /* Imation card reader */
-	{ USB_DEVICE(0x046d, 0x0870) },   /* Qcam Express(QV-30) */
-	{ USB_DEVICE(0x04bb, 0x0101) },   /* IO-DATA HD 120GB */
-	{ USB_DEVICE(0x04bb, 0x0904) },   /* IO-DATA USB-ET/TX */
-	{ USB_DEVICE(0x04bb, 0x0201) },   /* IO-DATA USB-ET/TX */
-	{ USB_DEVICE(0x08bb, 0x2702) },   /* ONKYO USB Speaker */
-	{ USB_DEVICE(0x046d, 0x08b2) },   /* Logicool Qcam 4000 Pro */
-#endif
-	/* magic for wild card */
-	{ .driver_info = 1 },
-	{ 0, }                                     /* Terminating entry */
-};
-MODULE_DEVICE_TABLE(usb, stub_table);
-
-/*
- * usbip_status shows the status of usbip-host as long as this driver is bound
- * to the target device.
- */
-static ssize_t usbip_status_show(struct device *dev,
-				 struct device_attribute *attr, char *buf)
-{
-	struct stub_device *sdev = dev_get_drvdata(dev);
-	int status;
-
-	if (!sdev) {
-		dev_err(dev, "sdev is null\n");
-		return -ENODEV;
-	}
-
-	spin_lock_irq(&sdev->ud.lock);
-	status = sdev->ud.status;
-	spin_unlock_irq(&sdev->ud.lock);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", status);
-}
-static DEVICE_ATTR_RO(usbip_status);
-
-/*
- * usbip_sockfd gets a socket descriptor of an established TCP connection that
- * is used to transfer usbip requests by kernel threads. -1 is a magic number
- * by which usbip connection is finished.
- */
-static ssize_t store_sockfd(struct device *dev, struct device_attribute *attr,
-			    const char *buf, size_t count)
-{
-	struct stub_device *sdev = dev_get_drvdata(dev);
-	int sockfd = 0;
-	struct socket *socket;
-	int rv;
-
-	if (!sdev) {
-		dev_err(dev, "sdev is null\n");
-		return -ENODEV;
-	}
-
-	rv = sscanf(buf, "%d", &sockfd);
-	if (rv != 1)
-		return -EINVAL;
-
-	if (sockfd != -1) {
-		int err;
-
-		dev_info(dev, "stub up\n");
-
-		spin_lock_irq(&sdev->ud.lock);
-
-		if (sdev->ud.status != SDEV_ST_AVAILABLE) {
-			dev_err(dev, "not ready\n");
-			goto err;
-		}
-
-		socket = sockfd_lookup(sockfd, &err);
-		if (!socket)
-			goto err;
-
-		sdev->ud.tcp_socket = socket;
-
-		spin_unlock_irq(&sdev->ud.lock);
-
-		sdev->ud.tcp_rx = kthread_get_run(stub_rx_loop, &sdev->ud,
-						  "stub_rx");
-		sdev->ud.tcp_tx = kthread_get_run(stub_tx_loop, &sdev->ud,
-						  "stub_tx");
-
-		spin_lock_irq(&sdev->ud.lock);
-		sdev->ud.status = SDEV_ST_USED;
-		spin_unlock_irq(&sdev->ud.lock);
-
-	} else {
-		dev_info(dev, "stub down\n");
-
-		spin_lock_irq(&sdev->ud.lock);
-		if (sdev->ud.status != SDEV_ST_USED)
-			goto err;
-
-		spin_unlock_irq(&sdev->ud.lock);
-
-		usbip_event_add(&sdev->ud, SDEV_EVENT_DOWN);
-	}
-
-	return count;
-
-err:
-	spin_unlock_irq(&sdev->ud.lock);
-	return -EINVAL;
-}
-static DEVICE_ATTR(usbip_sockfd, S_IWUSR, NULL, store_sockfd);
-
-static int stub_add_files(struct device *dev)
-{
-	int err = 0;
-
-	err = device_create_file(dev, &dev_attr_usbip_status);
-	if (err)
-		goto err_status;
-
-	err = device_create_file(dev, &dev_attr_usbip_sockfd);
-	if (err)
-		goto err_sockfd;
-
-	err = device_create_file(dev, &dev_attr_usbip_debug);
-	if (err)
-		goto err_debug;
-
-	return 0;
-
-err_debug:
-	device_remove_file(dev, &dev_attr_usbip_sockfd);
-err_sockfd:
-	device_remove_file(dev, &dev_attr_usbip_status);
-err_status:
-	return err;
-}
-
-static void stub_remove_files(struct device *dev)
-{
-	device_remove_file(dev, &dev_attr_usbip_status);
-	device_remove_file(dev, &dev_attr_usbip_sockfd);
-	device_remove_file(dev, &dev_attr_usbip_debug);
-}
-
-static void stub_shutdown_connection(struct usbip_device *ud)
-{
-	struct stub_device *sdev = container_of(ud, struct stub_device, ud);
-
-	/*
-	 * When removing an exported device, kernel panic sometimes occurred
-	 * and then EIP was sk_wait_data of stub_rx thread. Is this because
-	 * sk_wait_data returned though stub_rx thread was already finished by
-	 * step 1?
-	 */
-	if (ud->tcp_socket) {
-		dev_dbg(&sdev->udev->dev, "shutdown tcp_socket %p\n",
-			ud->tcp_socket);
-		kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR);
-	}
-
-	/* 1. stop threads */
-	if (ud->tcp_rx) {
-		kthread_stop_put(ud->tcp_rx);
-		ud->tcp_rx = NULL;
-	}
-	if (ud->tcp_tx) {
-		kthread_stop_put(ud->tcp_tx);
-		ud->tcp_tx = NULL;
-	}
-
-	/*
-	 * 2. close the socket
-	 *
-	 * tcp_socket is freed after threads are killed so that usbip_xmit does
-	 * not touch NULL socket.
-	 */
-	if (ud->tcp_socket) {
-		sockfd_put(ud->tcp_socket);
-		ud->tcp_socket = NULL;
-	}
-
-	/* 3. free used data */
-	stub_device_cleanup_urbs(sdev);
-
-	/* 4. free stub_unlink */
-	{
-		unsigned long flags;
-		struct stub_unlink *unlink, *tmp;
-
-		spin_lock_irqsave(&sdev->priv_lock, flags);
-		list_for_each_entry_safe(unlink, tmp, &sdev->unlink_tx, list) {
-			list_del(&unlink->list);
-			kfree(unlink);
-		}
-		list_for_each_entry_safe(unlink, tmp, &sdev->unlink_free,
-					 list) {
-			list_del(&unlink->list);
-			kfree(unlink);
-		}
-		spin_unlock_irqrestore(&sdev->priv_lock, flags);
-	}
-}
-
-static void stub_device_reset(struct usbip_device *ud)
-{
-	struct stub_device *sdev = container_of(ud, struct stub_device, ud);
-	struct usb_device *udev = sdev->udev;
-	int ret;
-
-	dev_dbg(&udev->dev, "device reset");
-
-	ret = usb_lock_device_for_reset(udev, sdev->interface);
-	if (ret < 0) {
-		dev_err(&udev->dev, "lock for reset\n");
-		spin_lock_irq(&ud->lock);
-		ud->status = SDEV_ST_ERROR;
-		spin_unlock_irq(&ud->lock);
-		return;
-	}
-
-	/* try to reset the device */
-	ret = usb_reset_device(udev);
-	usb_unlock_device(udev);
-
-	spin_lock_irq(&ud->lock);
-	if (ret) {
-		dev_err(&udev->dev, "device reset\n");
-		ud->status = SDEV_ST_ERROR;
-	} else {
-		dev_info(&udev->dev, "device reset\n");
-		ud->status = SDEV_ST_AVAILABLE;
-	}
-	spin_unlock_irq(&ud->lock);
-}
-
-static void stub_device_unusable(struct usbip_device *ud)
-{
-	spin_lock_irq(&ud->lock);
-	ud->status = SDEV_ST_ERROR;
-	spin_unlock_irq(&ud->lock);
-}
-
-/**
- * stub_device_alloc - allocate a new stub_device struct
- * @interface: usb_interface of a new device
- *
- * Allocates and initializes a new stub_device struct.
- */
-static struct stub_device *stub_device_alloc(struct usb_device *udev)
-{
-	struct stub_device *sdev;
-	int busnum = udev->bus->busnum;
-	int devnum = udev->devnum;
-
-	dev_dbg(&udev->dev, "allocating stub device");
-
-	/* yes, it's a new device */
-	sdev = kzalloc(sizeof(struct stub_device), GFP_KERNEL);
-	if (!sdev)
-		return NULL;
-
-	sdev->udev = usb_get_dev(udev);
-
-	/*
-	 * devid is defined with devnum when this driver is first allocated.
-	 * devnum may change later if a device is reset. However, devid never
-	 * changes during a usbip connection.
-	 */
-	sdev->devid		= (busnum << 16) | devnum;
-	sdev->ud.side		= USBIP_STUB;
-	sdev->ud.status		= SDEV_ST_AVAILABLE;
-	spin_lock_init(&sdev->ud.lock);
-	sdev->ud.tcp_socket	= NULL;
-
-	INIT_LIST_HEAD(&sdev->priv_init);
-	INIT_LIST_HEAD(&sdev->priv_tx);
-	INIT_LIST_HEAD(&sdev->priv_free);
-	INIT_LIST_HEAD(&sdev->unlink_free);
-	INIT_LIST_HEAD(&sdev->unlink_tx);
-	spin_lock_init(&sdev->priv_lock);
-
-	init_waitqueue_head(&sdev->tx_waitq);
-
-	sdev->ud.eh_ops.shutdown = stub_shutdown_connection;
-	sdev->ud.eh_ops.reset    = stub_device_reset;
-	sdev->ud.eh_ops.unusable = stub_device_unusable;
-
-	usbip_start_eh(&sdev->ud);
-
-	dev_dbg(&udev->dev, "register new device\n");
-
-	return sdev;
-}
-
-static void stub_device_free(struct stub_device *sdev)
-{
-	kfree(sdev);
-}
-
-static int stub_probe(struct usb_device *udev)
-{
-	struct stub_device *sdev = NULL;
-	const char *udev_busid = dev_name(&udev->dev);
-	int err = 0;
-	struct bus_id_priv *busid_priv;
-	int rc;
-
-	dev_dbg(&udev->dev, "Enter\n");
-
-	/* check we should claim or not by busid_table */
-	busid_priv = get_busid_priv(udev_busid);
-	if (!busid_priv || (busid_priv->status == STUB_BUSID_REMOV) ||
-	    (busid_priv->status == STUB_BUSID_OTHER)) {
-		dev_info(&udev->dev,
-			"%s is not in match_busid table... skip!\n",
-			udev_busid);
-
-		/*
-		 * Return value should be ENODEV or ENOXIO to continue trying
-		 * other matched drivers by the driver core.
-		 * See driver_probe_device() in driver/base/dd.c
-		 */
-		return -ENODEV;
-	}
-
-	if (udev->descriptor.bDeviceClass == USB_CLASS_HUB) {
-		dev_dbg(&udev->dev, "%s is a usb hub device... skip!\n",
-			 udev_busid);
-		return -ENODEV;
-	}
-
-	if (!strcmp(udev->bus->bus_name, "vhci_hcd")) {
-		dev_dbg(&udev->dev,
-			"%s is attached on vhci_hcd... skip!\n",
-			udev_busid);
-
-		return -ENODEV;
-	}
-
-	/* ok, this is my device */
-	sdev = stub_device_alloc(udev);
-	if (!sdev)
-		return -ENOMEM;
-
-	dev_info(&udev->dev,
-		"usbip-host: register new device (bus %u dev %u)\n",
-		udev->bus->busnum, udev->devnum);
-
-	busid_priv->shutdown_busid = 0;
-
-	/* set private data to usb_device */
-	dev_set_drvdata(&udev->dev, sdev);
-	busid_priv->sdev = sdev;
-	busid_priv->udev = udev;
-
-	/*
-	 * Claim this hub port.
-	 * It doesn't matter what value we pass as owner
-	 * (struct dev_state) as long as it is unique.
-	 */
-	rc = usb_hub_claim_port(udev->parent, udev->portnum,
-			(struct usb_dev_state *) udev);
-	if (rc) {
-		dev_dbg(&udev->dev, "unable to claim port\n");
-		return rc;
-	}
-
-	err = stub_add_files(&udev->dev);
-	if (err) {
-		dev_err(&udev->dev, "stub_add_files for %s\n", udev_busid);
-		dev_set_drvdata(&udev->dev, NULL);
-		usb_put_dev(udev);
-		kthread_stop_put(sdev->ud.eh);
-
-		busid_priv->sdev = NULL;
-		stub_device_free(sdev);
-		return err;
-	}
-	busid_priv->status = STUB_BUSID_ALLOC;
-
-	return 0;
-}
-
-static void shutdown_busid(struct bus_id_priv *busid_priv)
-{
-	if (busid_priv->sdev && !busid_priv->shutdown_busid) {
-		busid_priv->shutdown_busid = 1;
-		usbip_event_add(&busid_priv->sdev->ud, SDEV_EVENT_REMOVED);
-
-		/* wait for the stop of the event handler */
-		usbip_stop_eh(&busid_priv->sdev->ud);
-	}
-}
-
-/*
- * called in usb_disconnect() or usb_deregister()
- * but only if actconfig(active configuration) exists
- */
-static void stub_disconnect(struct usb_device *udev)
-{
-	struct stub_device *sdev;
-	const char *udev_busid = dev_name(&udev->dev);
-	struct bus_id_priv *busid_priv;
-	int rc;
-
-	dev_dbg(&udev->dev, "Enter\n");
-
-	busid_priv = get_busid_priv(udev_busid);
-	if (!busid_priv) {
-		BUG();
-		return;
-	}
-
-	sdev = dev_get_drvdata(&udev->dev);
-
-	/* get stub_device */
-	if (!sdev) {
-		dev_err(&udev->dev, "could not get device");
-		return;
-	}
-
-	dev_set_drvdata(&udev->dev, NULL);
-
-	/*
-	 * NOTE: rx/tx threads are invoked for each usb_device.
-	 */
-	stub_remove_files(&udev->dev);
-
-	/* release port */
-	rc = usb_hub_release_port(udev->parent, udev->portnum,
-				  (struct usb_dev_state *) udev);
-	if (rc) {
-		dev_dbg(&udev->dev, "unable to release port\n");
-		return;
-	}
-
-	/* If usb reset is called from event handler */
-	if (busid_priv->sdev->ud.eh == current)
-		return;
-
-	/* shutdown the current connection */
-	shutdown_busid(busid_priv);
-
-	usb_put_dev(sdev->udev);
-
-	/* free sdev */
-	busid_priv->sdev = NULL;
-	stub_device_free(sdev);
-
-	if (busid_priv->status == STUB_BUSID_ALLOC) {
-		busid_priv->status = STUB_BUSID_ADDED;
-	} else {
-		busid_priv->status = STUB_BUSID_OTHER;
-		del_match_busid((char *)udev_busid);
-	}
-}
-
-#ifdef CONFIG_PM
-
-/* These functions need usb_port_suspend and usb_port_resume,
- * which reside in drivers/usb/core/usb.h. Skip for now. */
-
-static int stub_suspend(struct usb_device *udev, pm_message_t message)
-{
-	dev_dbg(&udev->dev, "stub_suspend\n");
-
-	return 0;
-}
-
-static int stub_resume(struct usb_device *udev, pm_message_t message)
-{
-	dev_dbg(&udev->dev, "stub_resume\n");
-
-	return 0;
-}
-
-#endif	/* CONFIG_PM */
-
-struct usb_device_driver stub_driver = {
-	.name		= "usbip-host",
-	.probe		= stub_probe,
-	.disconnect	= stub_disconnect,
-#ifdef CONFIG_PM
-	.suspend	= stub_suspend,
-	.resume		= stub_resume,
-#endif
-	.supports_autosuspend	=	0,
-};
diff --git a/drivers/staging/usbip/stub_main.c b/drivers/staging/usbip/stub_main.c
deleted file mode 100644
index 44ab43fc4fcc..000000000000
--- a/drivers/staging/usbip/stub_main.c
+++ /dev/null
@@ -1,335 +0,0 @@
-/*
- * Copyright (C) 2003-2008 Takahiro Hirofuchi
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- * USA.
- */
-
-#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/device.h>
-
-#include "usbip_common.h"
-#include "stub.h"
-
-#define DRIVER_AUTHOR "Takahiro Hirofuchi"
-#define DRIVER_DESC "USB/IP Host Driver"
-
-struct kmem_cache *stub_priv_cache;
-/*
- * busid_tables defines matching busids that usbip can grab. A user can change
- * dynamically what device is locally used and what device is exported to a
- * remote host.
- */
-#define MAX_BUSID 16
-static struct bus_id_priv busid_table[MAX_BUSID];
-static spinlock_t busid_table_lock;
-
-static void init_busid_table(void)
-{
-	/*
-	 * This also sets the bus_table[i].status to
-	 * STUB_BUSID_OTHER, which is 0.
-	 */
-	memset(busid_table, 0, sizeof(busid_table));
-
-	spin_lock_init(&busid_table_lock);
-}
-
-/*
- * Find the index of the busid by name.
- * Must be called with busid_table_lock held.
- */
-static int get_busid_idx(const char *busid)
-{
-	int i;
-	int idx = -1;
-
-	for (i = 0; i < MAX_BUSID; i++)
-		if (busid_table[i].name[0])
-			if (!strncmp(busid_table[i].name, busid, BUSID_SIZE)) {
-				idx = i;
-				break;
-			}
-	return idx;
-}
-
-struct bus_id_priv *get_busid_priv(const char *busid)
-{
-	int idx;
-	struct bus_id_priv *bid = NULL;
-
-	spin_lock(&busid_table_lock);
-	idx = get_busid_idx(busid);
-	if (idx >= 0)
-		bid = &(busid_table[idx]);
-	spin_unlock(&busid_table_lock);
-
-	return bid;
-}
-
-static int add_match_busid(char *busid)
-{
-	int i;
-	int ret = -1;
-
-	spin_lock(&busid_table_lock);
-	/* already registered? */
-	if (get_busid_idx(busid) >= 0) {
-		ret = 0;
-		goto out;
-	}
-
-	for (i = 0; i < MAX_BUSID; i++)
-		if (!busid_table[i].name[0]) {
-			strlcpy(busid_table[i].name, busid, BUSID_SIZE);
-			if ((busid_table[i].status != STUB_BUSID_ALLOC) &&
-			    (busid_table[i].status != STUB_BUSID_REMOV))
-				busid_table[i].status = STUB_BUSID_ADDED;
-			ret = 0;
-			break;
-		}
-
-out:
-	spin_unlock(&busid_table_lock);
-
-	return ret;
-}
-
-int del_match_busid(char *busid)
-{
-	int idx;
-	int ret = -1;
-
-	spin_lock(&busid_table_lock);
-	idx = get_busid_idx(busid);
-	if (idx < 0)
-		goto out;
-
-	/* found */
-	ret = 0;
-
-	if (busid_table[idx].status == STUB_BUSID_OTHER)
-		memset(busid_table[idx].name, 0, BUSID_SIZE);
-
-	if ((busid_table[idx].status != STUB_BUSID_OTHER) &&
-	    (busid_table[idx].status != STUB_BUSID_ADDED))
-		busid_table[idx].status = STUB_BUSID_REMOV;
-
-out:
-	spin_unlock(&busid_table_lock);
-
-	return ret;
-}
-
-static ssize_t show_match_busid(struct device_driver *drv, char *buf)
-{
-	int i;
-	char *out = buf;
-
-	spin_lock(&busid_table_lock);
-	for (i = 0; i < MAX_BUSID; i++)
-		if (busid_table[i].name[0])
-			out += sprintf(out, "%s ", busid_table[i].name);
-	spin_unlock(&busid_table_lock);
-	out += sprintf(out, "\n");
-
-	return out - buf;
-}
-
-static ssize_t store_match_busid(struct device_driver *dev, const char *buf,
-				 size_t count)
-{
-	int len;
-	char busid[BUSID_SIZE];
-
-	if (count < 5)
-		return -EINVAL;
-
-	/* busid needs to include \0 termination */
-	len = strlcpy(busid, buf + 4, BUSID_SIZE);
-	if (sizeof(busid) <= len)
-		return -EINVAL;
-
-	if (!strncmp(buf, "add ", 4)) {
-		if (add_match_busid(busid) < 0)
-			return -ENOMEM;
-
-		pr_debug("add busid %s\n", busid);
-		return count;
-	}
-
-	if (!strncmp(buf, "del ", 4)) {
-		if (del_match_busid(busid) < 0)
-			return -ENODEV;
-
-		pr_debug("del busid %s\n", busid);
-		return count;
-	}
-
-	return -EINVAL;
-}
-static DRIVER_ATTR(match_busid, S_IRUSR | S_IWUSR, show_match_busid,
-		   store_match_busid);
-
-static ssize_t rebind_store(struct device_driver *dev, const char *buf,
-				 size_t count)
-{
-	int ret;
-	int len;
-	struct bus_id_priv *bid;
-
-	/* buf length should be less that BUSID_SIZE */
-	len = strnlen(buf, BUSID_SIZE);
-
-	if (!(len < BUSID_SIZE))
-		return -EINVAL;
-
-	bid = get_busid_priv(buf);
-	if (!bid)
-		return -ENODEV;
-
-	ret = device_attach(&bid->udev->dev);
-	if (ret < 0) {
-		dev_err(&bid->udev->dev, "rebind failed\n");
-		return ret;
-	}
-
-	return count;
-}
-
-static DRIVER_ATTR_WO(rebind);
-
-static struct stub_priv *stub_priv_pop_from_listhead(struct list_head *listhead)
-{
-	struct stub_priv *priv, *tmp;
-
-	list_for_each_entry_safe(priv, tmp, listhead, list) {
-		list_del(&priv->list);
-		return priv;
-	}
-
-	return NULL;
-}
-
-static struct stub_priv *stub_priv_pop(struct stub_device *sdev)
-{
-	unsigned long flags;
-	struct stub_priv *priv;
-
-	spin_lock_irqsave(&sdev->priv_lock, flags);
-
-	priv = stub_priv_pop_from_listhead(&sdev->priv_init);
-	if (priv)
-		goto done;
-
-	priv = stub_priv_pop_from_listhead(&sdev->priv_tx);
-	if (priv)
-		goto done;
-
-	priv = stub_priv_pop_from_listhead(&sdev->priv_free);
-
-done:
-	spin_unlock_irqrestore(&sdev->priv_lock, flags);
-
-	return priv;
-}
-
-void stub_device_cleanup_urbs(struct stub_device *sdev)
-{
-	struct stub_priv *priv;
-	struct urb *urb;
-
-	dev_dbg(&sdev->udev->dev, "free sdev %p\n", sdev);
-
-	while ((priv = stub_priv_pop(sdev))) {
-		urb = priv->urb;
-		dev_dbg(&sdev->udev->dev, "free urb %p\n", urb);
-		usb_kill_urb(urb);
-
-		kmem_cache_free(stub_priv_cache, priv);
-
-		kfree(urb->transfer_buffer);
-		kfree(urb->setup_packet);
-		usb_free_urb(urb);
-	}
-}
-
-static int __init usbip_host_init(void)
-{
-	int ret;
-
-	init_busid_table();
-
-	stub_priv_cache = KMEM_CACHE(stub_priv, SLAB_HWCACHE_ALIGN);
-	if (!stub_priv_cache) {
-		pr_err("kmem_cache_create failed\n");
-		return -ENOMEM;
-	}
-
-	ret = usb_register_device_driver(&stub_driver, THIS_MODULE);
-	if (ret) {
-		pr_err("usb_register failed %d\n", ret);
-		goto err_usb_register;
-	}
-
-	ret = driver_create_file(&stub_driver.drvwrap.driver,
-				 &driver_attr_match_busid);
-	if (ret) {
-		pr_err("driver_create_file failed\n");
-		goto err_create_file;
-	}
-
-	ret = driver_create_file(&stub_driver.drvwrap.driver,
-				 &driver_attr_rebind);
-	if (ret) {
-		pr_err("driver_create_file failed\n");
-		goto err_create_file;
-	}
-
-	pr_info(DRIVER_DESC " v" USBIP_VERSION "\n");
-	return ret;
-
-err_create_file:
-	usb_deregister_device_driver(&stub_driver);
-err_usb_register:
-	kmem_cache_destroy(stub_priv_cache);
-	return ret;
-}
-
-static void __exit usbip_host_exit(void)
-{
-	driver_remove_file(&stub_driver.drvwrap.driver,
-			   &driver_attr_match_busid);
-
-	driver_remove_file(&stub_driver.drvwrap.driver,
-			   &driver_attr_rebind);
-
-	/*
-	 * deregister() calls stub_disconnect() for all devices. Device
-	 * specific data is cleared in stub_disconnect().
-	 */
-	usb_deregister_device_driver(&stub_driver);
-
-	kmem_cache_destroy(stub_priv_cache);
-}
-
-module_init(usbip_host_init);
-module_exit(usbip_host_exit);
-
-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_LICENSE("GPL");
-MODULE_VERSION(USBIP_VERSION);
diff --git a/drivers/staging/usbip/stub_rx.c b/drivers/staging/usbip/stub_rx.c
deleted file mode 100644
index 00e475c51a12..000000000000
--- a/drivers/staging/usbip/stub_rx.c
+++ /dev/null
@@ -1,594 +0,0 @@
-/*
- * Copyright (C) 2003-2008 Takahiro Hirofuchi
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- * USA.
- */
-
-#include <asm/byteorder.h>
-#include <linux/kthread.h>
-#include <linux/usb.h>
-#include <linux/usb/hcd.h>
-
-#include "usbip_common.h"
-#include "stub.h"
-
-static int is_clear_halt_cmd(struct urb *urb)
-{
-	struct usb_ctrlrequest *req;
-
-	req = (struct usb_ctrlrequest *) urb->setup_packet;
-
-	 return (req->bRequest == USB_REQ_CLEAR_FEATURE) &&
-		 (req->bRequestType == USB_RECIP_ENDPOINT) &&
-		 (req->wValue == USB_ENDPOINT_HALT);
-}
-
-static int is_set_interface_cmd(struct urb *urb)
-{
-	struct usb_ctrlrequest *req;
-
-	req = (struct usb_ctrlrequest *) urb->setup_packet;
-
-	return (req->bRequest == USB_REQ_SET_INTERFACE) &&
-		(req->bRequestType == USB_RECIP_INTERFACE);
-}
-
-static int is_set_configuration_cmd(struct urb *urb)
-{
-	struct usb_ctrlrequest *req;
-
-	req = (struct usb_ctrlrequest *) urb->setup_packet;
-
-	return (req->bRequest == USB_REQ_SET_CONFIGURATION) &&
-		(req->bRequestType == USB_RECIP_DEVICE);
-}
-
-static int is_reset_device_cmd(struct urb *urb)
-{
-	struct usb_ctrlrequest *req;
-	__u16 value;
-	__u16 index;
-
-	req = (struct usb_ctrlrequest *) urb->setup_packet;
-	value = le16_to_cpu(req->wValue);
-	index = le16_to_cpu(req->wIndex);
-
-	if ((req->bRequest == USB_REQ_SET_FEATURE) &&
-	    (req->bRequestType == USB_RT_PORT) &&
-	    (value == USB_PORT_FEAT_RESET)) {
-		usbip_dbg_stub_rx("reset_device_cmd, port %u\n", index);
-		return 1;
-	} else
-		return 0;
-}
-
-static int tweak_clear_halt_cmd(struct urb *urb)
-{
-	struct usb_ctrlrequest *req;
-	int target_endp;
-	int target_dir;
-	int target_pipe;
-	int ret;
-
-	req = (struct usb_ctrlrequest *) urb->setup_packet;
-
-	/*
-	 * The stalled endpoint is specified in the wIndex value. The endpoint
-	 * of the urb is the target of this clear_halt request (i.e., control
-	 * endpoint).
-	 */
-	target_endp = le16_to_cpu(req->wIndex) & 0x000f;
-
-	/* the stalled endpoint direction is IN or OUT?. USB_DIR_IN is 0x80.  */
-	target_dir = le16_to_cpu(req->wIndex) & 0x0080;
-
-	if (target_dir)
-		target_pipe = usb_rcvctrlpipe(urb->dev, target_endp);
-	else
-		target_pipe = usb_sndctrlpipe(urb->dev, target_endp);
-
-	ret = usb_clear_halt(urb->dev, target_pipe);
-	if (ret < 0)
-		dev_err(&urb->dev->dev,
-			"usb_clear_halt error: devnum %d endp %d ret %d\n",
-			urb->dev->devnum, target_endp, ret);
-	else
-		dev_info(&urb->dev->dev,
-			 "usb_clear_halt done: devnum %d endp %d\n",
-			 urb->dev->devnum, target_endp);
-
-	return ret;
-}
-
-static int tweak_set_interface_cmd(struct urb *urb)
-{
-	struct usb_ctrlrequest *req;
-	__u16 alternate;
-	__u16 interface;
-	int ret;
-
-	req = (struct usb_ctrlrequest *) urb->setup_packet;
-	alternate = le16_to_cpu(req->wValue);
-	interface = le16_to_cpu(req->wIndex);
-
-	usbip_dbg_stub_rx("set_interface: inf %u alt %u\n",
-			  interface, alternate);
-
-	ret = usb_set_interface(urb->dev, interface, alternate);
-	if (ret < 0)
-		dev_err(&urb->dev->dev,
-			"usb_set_interface error: inf %u alt %u ret %d\n",
-			interface, alternate, ret);
-	else
-		dev_info(&urb->dev->dev,
-			"usb_set_interface done: inf %u alt %u\n",
-			interface, alternate);
-
-	return ret;
-}
-
-static int tweak_set_configuration_cmd(struct urb *urb)
-{
-	struct stub_priv *priv = (struct stub_priv *) urb->context;
-	struct stub_device *sdev = priv->sdev;
-	struct usb_ctrlrequest *req;
-	__u16 config;
-	int err;
-
-	req = (struct usb_ctrlrequest *) urb->setup_packet;
-	config = le16_to_cpu(req->wValue);
-
-	err = usb_set_configuration(sdev->udev, config);
-	if (err && err != -ENODEV)
-		dev_err(&sdev->udev->dev, "can't set config #%d, error %d\n",
-			config, err);
-	return 0;
-}
-
-static int tweak_reset_device_cmd(struct urb *urb)
-{
-	struct stub_priv *priv = (struct stub_priv *) urb->context;
-	struct stub_device *sdev = priv->sdev;
-
-	dev_info(&urb->dev->dev, "usb_queue_reset_device\n");
-
-	/*
-	 * With the implementation of pre_reset and post_reset the driver no
-	 * longer unbinds. This allows the use of synchronous reset.
-	 */
-
-	if (usb_lock_device_for_reset(sdev->udev, sdev->interface) < 0) {
-		dev_err(&urb->dev->dev, "could not obtain lock to reset device\n");
-		return 0;
-	}
-	usb_reset_device(sdev->udev);
-	usb_unlock_device(sdev->udev);
-
-	return 0;
-}
-
-/*
- * clear_halt, set_interface, and set_configuration require special tricks.
- */
-static void tweak_special_requests(struct urb *urb)
-{
-	if (!urb || !urb->setup_packet)
-		return;
-
-	if (usb_pipetype(urb->pipe) != PIPE_CONTROL)
-		return;
-
-	if (is_clear_halt_cmd(urb))
-		/* tweak clear_halt */
-		 tweak_clear_halt_cmd(urb);
-
-	else if (is_set_interface_cmd(urb))
-		/* tweak set_interface */
-		tweak_set_interface_cmd(urb);
-
-	else if (is_set_configuration_cmd(urb))
-		/* tweak set_configuration */
-		tweak_set_configuration_cmd(urb);
-
-	else if (is_reset_device_cmd(urb))
-		tweak_reset_device_cmd(urb);
-	else
-		usbip_dbg_stub_rx("no need to tweak\n");
-}
-
-/*
- * stub_recv_unlink() unlinks the URB by a call to usb_unlink_urb().
- * By unlinking the urb asynchronously, stub_rx can continuously
- * process coming urbs.  Even if the urb is unlinked, its completion
- * handler will be called and stub_tx will send a return pdu.
- *
- * See also comments about unlinking strategy in vhci_hcd.c.
- */
-static int stub_recv_cmd_unlink(struct stub_device *sdev,
-				struct usbip_header *pdu)
-{
-	int ret;
-	unsigned long flags;
-	struct stub_priv *priv;
-
-	spin_lock_irqsave(&sdev->priv_lock, flags);
-
-	list_for_each_entry(priv, &sdev->priv_init, list) {
-		if (priv->seqnum != pdu->u.cmd_unlink.seqnum)
-			continue;
-
-		dev_info(&priv->urb->dev->dev, "unlink urb %p\n",
-			 priv->urb);
-
-		/*
-		 * This matched urb is not completed yet (i.e., be in
-		 * flight in usb hcd hardware/driver). Now we are
-		 * cancelling it. The unlinking flag means that we are
-		 * now not going to return the normal result pdu of a
-		 * submission request, but going to return a result pdu
-		 * of the unlink request.
-		 */
-		priv->unlinking = 1;
-
-		/*
-		 * In the case that unlinking flag is on, prev->seqnum
-		 * is changed from the seqnum of the cancelling urb to
-		 * the seqnum of the unlink request. This will be used
-		 * to make the result pdu of the unlink request.
-		 */
-		priv->seqnum = pdu->base.seqnum;
-
-		spin_unlock_irqrestore(&sdev->priv_lock, flags);
-
-		/*
-		 * usb_unlink_urb() is now out of spinlocking to avoid
-		 * spinlock recursion since stub_complete() is
-		 * sometimes called in this context but not in the
-		 * interrupt context.  If stub_complete() is executed
-		 * before we call usb_unlink_urb(), usb_unlink_urb()
-		 * will return an error value. In this case, stub_tx
-		 * will return the result pdu of this unlink request
-		 * though submission is completed and actual unlinking
-		 * is not executed. OK?
-		 */
-		/* In the above case, urb->status is not -ECONNRESET,
-		 * so a driver in a client host will know the failure
-		 * of the unlink request ?
-		 */
-		ret = usb_unlink_urb(priv->urb);
-		if (ret != -EINPROGRESS)
-			dev_err(&priv->urb->dev->dev,
-				"failed to unlink a urb %p, ret %d\n",
-				priv->urb, ret);
-
-		return 0;
-	}
-
-	usbip_dbg_stub_rx("seqnum %d is not pending\n",
-			  pdu->u.cmd_unlink.seqnum);
-
-	/*
-	 * The urb of the unlink target is not found in priv_init queue. It was
-	 * already completed and its results is/was going to be sent by a
-	 * CMD_RET pdu. In this case, usb_unlink_urb() is not needed. We only
-	 * return the completeness of this unlink request to vhci_hcd.
-	 */
-	stub_enqueue_ret_unlink(sdev, pdu->base.seqnum, 0);
-
-	spin_unlock_irqrestore(&sdev->priv_lock, flags);
-
-	return 0;
-}
-
-static int valid_request(struct stub_device *sdev, struct usbip_header *pdu)
-{
-	struct usbip_device *ud = &sdev->ud;
-	int valid = 0;
-
-	if (pdu->base.devid == sdev->devid) {
-		spin_lock_irq(&ud->lock);
-		if (ud->status == SDEV_ST_USED) {
-			/* A request is valid. */
-			valid = 1;
-		}
-		spin_unlock_irq(&ud->lock);
-	}
-
-	return valid;
-}
-
-static struct stub_priv *stub_priv_alloc(struct stub_device *sdev,
-					 struct usbip_header *pdu)
-{
-	struct stub_priv *priv;
-	struct usbip_device *ud = &sdev->ud;
-	unsigned long flags;
-
-	spin_lock_irqsave(&sdev->priv_lock, flags);
-
-	priv = kmem_cache_zalloc(stub_priv_cache, GFP_ATOMIC);
-	if (!priv) {
-		dev_err(&sdev->interface->dev, "alloc stub_priv\n");
-		spin_unlock_irqrestore(&sdev->priv_lock, flags);
-		usbip_event_add(ud, SDEV_EVENT_ERROR_MALLOC);
-		return NULL;
-	}
-
-	priv->seqnum = pdu->base.seqnum;
-	priv->sdev = sdev;
-
-	/*
-	 * After a stub_priv is linked to a list_head,
-	 * our error handler can free allocated data.
-	 */
-	list_add_tail(&priv->list, &sdev->priv_init);
-
-	spin_unlock_irqrestore(&sdev->priv_lock, flags);
-
-	return priv;
-}
-
-static int get_pipe(struct stub_device *sdev, int epnum, int dir)
-{
-	struct usb_device *udev = sdev->udev;
-	struct usb_host_endpoint *ep;
-	struct usb_endpoint_descriptor *epd = NULL;
-
-	if (dir == USBIP_DIR_IN)
-		ep = udev->ep_in[epnum & 0x7f];
-	else
-		ep = udev->ep_out[epnum & 0x7f];
-	if (!ep) {
-		dev_err(&sdev->interface->dev, "no such endpoint?, %d\n",
-			epnum);
-		BUG();
-	}
-
-	epd = &ep->desc;
-	if (usb_endpoint_xfer_control(epd)) {
-		if (dir == USBIP_DIR_OUT)
-			return usb_sndctrlpipe(udev, epnum);
-		else
-			return usb_rcvctrlpipe(udev, epnum);
-	}
-
-	if (usb_endpoint_xfer_bulk(epd)) {
-		if (dir == USBIP_DIR_OUT)
-			return usb_sndbulkpipe(udev, epnum);
-		else
-			return usb_rcvbulkpipe(udev, epnum);
-	}
-
-	if (usb_endpoint_xfer_int(epd)) {
-		if (dir == USBIP_DIR_OUT)
-			return usb_sndintpipe(udev, epnum);
-		else
-			return usb_rcvintpipe(udev, epnum);
-	}
-
-	if (usb_endpoint_xfer_isoc(epd)) {
-		if (dir == USBIP_DIR_OUT)
-			return usb_sndisocpipe(udev, epnum);
-		else
-			return usb_rcvisocpipe(udev, epnum);
-	}
-
-	/* NOT REACHED */
-	dev_err(&sdev->interface->dev, "get pipe, epnum %d\n", epnum);
-	return 0;
-}
-
-static void masking_bogus_flags(struct urb *urb)
-{
-	int				xfertype;
-	struct usb_device		*dev;
-	struct usb_host_endpoint	*ep;
-	int				is_out;
-	unsigned int	allowed;
-
-	if (!urb || urb->hcpriv || !urb->complete)
-		return;
-	dev = urb->dev;
-	if ((!dev) || (dev->state < USB_STATE_UNAUTHENTICATED))
-		return;
-
-	ep = (usb_pipein(urb->pipe) ? dev->ep_in : dev->ep_out)
-		[usb_pipeendpoint(urb->pipe)];
-	if (!ep)
-		return;
-
-	xfertype = usb_endpoint_type(&ep->desc);
-	if (xfertype == USB_ENDPOINT_XFER_CONTROL) {
-		struct usb_ctrlrequest *setup =
-			(struct usb_ctrlrequest *) urb->setup_packet;
-
-		if (!setup)
-			return;
-		is_out = !(setup->bRequestType & USB_DIR_IN) ||
-			!setup->wLength;
-	} else {
-		is_out = usb_endpoint_dir_out(&ep->desc);
-	}
-
-	/* enforce simple/standard policy */
-	allowed = (URB_NO_TRANSFER_DMA_MAP | URB_NO_INTERRUPT |
-		   URB_DIR_MASK | URB_FREE_BUFFER);
-	switch (xfertype) {
-	case USB_ENDPOINT_XFER_BULK:
-		if (is_out)
-			allowed |= URB_ZERO_PACKET;
-		/* FALLTHROUGH */
-	case USB_ENDPOINT_XFER_CONTROL:
-		allowed |= URB_NO_FSBR;	/* only affects UHCI */
-		/* FALLTHROUGH */
-	default:			/* all non-iso endpoints */
-		if (!is_out)
-			allowed |= URB_SHORT_NOT_OK;
-		break;
-	case USB_ENDPOINT_XFER_ISOC:
-		allowed |= URB_ISO_ASAP;
-		break;
-	}
-	urb->transfer_flags &= allowed;
-}
-
-static void stub_recv_cmd_submit(struct stub_device *sdev,
-				 struct usbip_header *pdu)
-{
-	int ret;
-	struct stub_priv *priv;
-	struct usbip_device *ud = &sdev->ud;
-	struct usb_device *udev = sdev->udev;
-	int pipe = get_pipe(sdev, pdu->base.ep, pdu->base.direction);
-
-	priv = stub_priv_alloc(sdev, pdu);
-	if (!priv)
-		return;
-
-	/* setup a urb */
-	if (usb_pipeisoc(pipe))
-		priv->urb = usb_alloc_urb(pdu->u.cmd_submit.number_of_packets,
-					  GFP_KERNEL);
-	else
-		priv->urb = usb_alloc_urb(0, GFP_KERNEL);
-
-	if (!priv->urb) {
-		dev_err(&sdev->interface->dev, "malloc urb\n");
-		usbip_event_add(ud, SDEV_EVENT_ERROR_MALLOC);
-		return;
-	}
-
-	/* allocate urb transfer buffer, if needed */
-	if (pdu->u.cmd_submit.transfer_buffer_length > 0) {
-		priv->urb->transfer_buffer =
-			kzalloc(pdu->u.cmd_submit.transfer_buffer_length,
-				GFP_KERNEL);
-		if (!priv->urb->transfer_buffer) {
-			usbip_event_add(ud, SDEV_EVENT_ERROR_MALLOC);
-			return;
-		}
-	}
-
-	/* copy urb setup packet */
-	priv->urb->setup_packet = kmemdup(&pdu->u.cmd_submit.setup, 8,
-					  GFP_KERNEL);
-	if (!priv->urb->setup_packet) {
-		dev_err(&sdev->interface->dev, "allocate setup_packet\n");
-		usbip_event_add(ud, SDEV_EVENT_ERROR_MALLOC);
-		return;
-	}
-
-	/* set other members from the base header of pdu */
-	priv->urb->context                = (void *) priv;
-	priv->urb->dev                    = udev;
-	priv->urb->pipe                   = pipe;
-	priv->urb->complete               = stub_complete;
-
-	usbip_pack_pdu(pdu, priv->urb, USBIP_CMD_SUBMIT, 0);
-
-
-	if (usbip_recv_xbuff(ud, priv->urb) < 0)
-		return;
-
-	if (usbip_recv_iso(ud, priv->urb) < 0)
-		return;
-
-	/* no need to submit an intercepted request, but harmless? */
-	tweak_special_requests(priv->urb);
-
-	masking_bogus_flags(priv->urb);
-	/* urb is now ready to submit */
-	ret = usb_submit_urb(priv->urb, GFP_KERNEL);
-
-	if (ret == 0)
-		usbip_dbg_stub_rx("submit urb ok, seqnum %u\n",
-				  pdu->base.seqnum);
-	else {
-		dev_err(&sdev->interface->dev, "submit_urb error, %d\n", ret);
-		usbip_dump_header(pdu);
-		usbip_dump_urb(priv->urb);
-
-		/*
-		 * Pessimistic.
-		 * This connection will be discarded.
-		 */
-		usbip_event_add(ud, SDEV_EVENT_ERROR_SUBMIT);
-	}
-
-	usbip_dbg_stub_rx("Leave\n");
-}
-
-/* recv a pdu */
-static void stub_rx_pdu(struct usbip_device *ud)
-{
-	int ret;
-	struct usbip_header pdu;
-	struct stub_device *sdev = container_of(ud, struct stub_device, ud);
-	struct device *dev = &sdev->udev->dev;
-
-	usbip_dbg_stub_rx("Enter\n");
-
-	memset(&pdu, 0, sizeof(pdu));
-
-	/* receive a pdu header */
-	ret = usbip_recv(ud->tcp_socket, &pdu, sizeof(pdu));
-	if (ret != sizeof(pdu)) {
-		dev_err(dev, "recv a header, %d\n", ret);
-		usbip_event_add(ud, SDEV_EVENT_ERROR_TCP);
-		return;
-	}
-
-	usbip_header_correct_endian(&pdu, 0);
-
-	if (usbip_dbg_flag_stub_rx)
-		usbip_dump_header(&pdu);
-
-	if (!valid_request(sdev, &pdu)) {
-		dev_err(dev, "recv invalid request\n");
-		usbip_event_add(ud, SDEV_EVENT_ERROR_TCP);
-		return;
-	}
-
-	switch (pdu.base.command) {
-	case USBIP_CMD_UNLINK:
-		stub_recv_cmd_unlink(sdev, &pdu);
-		break;
-
-	case USBIP_CMD_SUBMIT:
-		stub_recv_cmd_submit(sdev, &pdu);
-		break;
-
-	default:
-		/* NOTREACHED */
-		dev_err(dev, "unknown pdu\n");
-		usbip_event_add(ud, SDEV_EVENT_ERROR_TCP);
-		break;
-	}
-}
-
-int stub_rx_loop(void *data)
-{
-	struct usbip_device *ud = data;
-
-	while (!kthread_should_stop()) {
-		if (usbip_event_happened(ud))
-			break;
-
-		stub_rx_pdu(ud);
-	}
-
-	return 0;
-}
diff --git a/drivers/staging/usbip/stub_tx.c b/drivers/staging/usbip/stub_tx.c
deleted file mode 100644
index dbcabc9dbe0d..000000000000
--- a/drivers/staging/usbip/stub_tx.c
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
- * Copyright (C) 2003-2008 Takahiro Hirofuchi
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- * USA.
- */
-
-#include <linux/kthread.h>
-#include <linux/socket.h>
-
-#include "usbip_common.h"
-#include "stub.h"
-
-static void stub_free_priv_and_urb(struct stub_priv *priv)
-{
-	struct urb *urb = priv->urb;
-
-	kfree(urb->setup_packet);
-	kfree(urb->transfer_buffer);
-	list_del(&priv->list);
-	kmem_cache_free(stub_priv_cache, priv);
-	usb_free_urb(urb);
-}
-
-/* be in spin_lock_irqsave(&sdev->priv_lock, flags) */
-void stub_enqueue_ret_unlink(struct stub_device *sdev, __u32 seqnum,
-			     __u32 status)
-{
-	struct stub_unlink *unlink;
-
-	unlink = kzalloc(sizeof(struct stub_unlink), GFP_ATOMIC);
-	if (!unlink) {
-		usbip_event_add(&sdev->ud, VDEV_EVENT_ERROR_MALLOC);
-		return;
-	}
-
-	unlink->seqnum = seqnum;
-	unlink->status = status;
-
-	list_add_tail(&unlink->list, &sdev->unlink_tx);
-}
-
-/**
- * stub_complete - completion handler of a usbip urb
- * @urb: pointer to the urb completed
- *
- * When a urb has completed, the USB core driver calls this function mostly in
- * the interrupt context. To return the result of a urb, the completed urb is
- * linked to the pending list of returning.
- *
- */
-void stub_complete(struct urb *urb)
-{
-	struct stub_priv *priv = (struct stub_priv *) urb->context;
-	struct stub_device *sdev = priv->sdev;
-	unsigned long flags;
-
-	usbip_dbg_stub_tx("complete! status %d\n", urb->status);
-
-	switch (urb->status) {
-	case 0:
-		/* OK */
-		break;
-	case -ENOENT:
-		dev_info(&urb->dev->dev,
-			 "stopped by a call to usb_kill_urb() because of cleaning up a virtual connection\n");
-		return;
-	case -ECONNRESET:
-		dev_info(&urb->dev->dev,
-			 "unlinked by a call to usb_unlink_urb()\n");
-		break;
-	case -EPIPE:
-		dev_info(&urb->dev->dev, "endpoint %d is stalled\n",
-			 usb_pipeendpoint(urb->pipe));
-		break;
-	case -ESHUTDOWN:
-		dev_info(&urb->dev->dev, "device removed?\n");
-		break;
-	default:
-		dev_info(&urb->dev->dev,
-			 "urb completion with non-zero status %d\n",
-			 urb->status);
-		break;
-	}
-
-	/* link a urb to the queue of tx. */
-	spin_lock_irqsave(&sdev->priv_lock, flags);
-	if (priv->unlinking) {
-		stub_enqueue_ret_unlink(sdev, priv->seqnum, urb->status);
-		stub_free_priv_and_urb(priv);
-	} else {
-		list_move_tail(&priv->list, &sdev->priv_tx);
-	}
-	spin_unlock_irqrestore(&sdev->priv_lock, flags);
-
-	/* wake up tx_thread */
-	wake_up(&sdev->tx_waitq);
-}
-
-static inline void setup_base_pdu(struct usbip_header_basic *base,
-				  __u32 command, __u32 seqnum)
-{
-	base->command	= command;
-	base->seqnum	= seqnum;
-	base->devid	= 0;
-	base->ep	= 0;
-	base->direction = 0;
-}
-
-static void setup_ret_submit_pdu(struct usbip_header *rpdu, struct urb *urb)
-{
-	struct stub_priv *priv = (struct stub_priv *) urb->context;
-
-	setup_base_pdu(&rpdu->base, USBIP_RET_SUBMIT, priv->seqnum);
-	usbip_pack_pdu(rpdu, urb, USBIP_RET_SUBMIT, 1);
-}
-
-static void setup_ret_unlink_pdu(struct usbip_header *rpdu,
-				 struct stub_unlink *unlink)
-{
-	setup_base_pdu(&rpdu->base, USBIP_RET_UNLINK, unlink->seqnum);
-	rpdu->u.ret_unlink.status = unlink->status;
-}
-
-static struct stub_priv *dequeue_from_priv_tx(struct stub_device *sdev)
-{
-	unsigned long flags;
-	struct stub_priv *priv, *tmp;
-
-	spin_lock_irqsave(&sdev->priv_lock, flags);
-
-	list_for_each_entry_safe(priv, tmp, &sdev->priv_tx, list) {
-		list_move_tail(&priv->list, &sdev->priv_free);
-		spin_unlock_irqrestore(&sdev->priv_lock, flags);
-		return priv;
-	}
-
-	spin_unlock_irqrestore(&sdev->priv_lock, flags);
-
-	return NULL;
-}
-
-static int stub_send_ret_submit(struct stub_device *sdev)
-{
-	unsigned long flags;
-	struct stub_priv *priv, *tmp;
-
-	struct msghdr msg;
-	size_t txsize;
-
-	size_t total_size = 0;
-
-	while ((priv = dequeue_from_priv_tx(sdev)) != NULL) {
-		int ret;
-		struct urb *urb = priv->urb;
-		struct usbip_header pdu_header;
-		struct usbip_iso_packet_descriptor *iso_buffer = NULL;
-		struct kvec *iov = NULL;
-		int iovnum = 0;
-
-		txsize = 0;
-		memset(&pdu_header, 0, sizeof(pdu_header));
-		memset(&msg, 0, sizeof(msg));
-
-		if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS)
-			iovnum = 2 + urb->number_of_packets;
-		else
-			iovnum = 2;
-
-		iov = kcalloc(iovnum, sizeof(struct kvec), GFP_KERNEL);
-
-		if (!iov) {
-			usbip_event_add(&sdev->ud, SDEV_EVENT_ERROR_MALLOC);
-			return -1;
-		}
-
-		iovnum = 0;
-
-		/* 1. setup usbip_header */
-		setup_ret_submit_pdu(&pdu_header, urb);
-		usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n",
-				  pdu_header.base.seqnum, urb);
-		usbip_header_correct_endian(&pdu_header, 1);
-
-		iov[iovnum].iov_base = &pdu_header;
-		iov[iovnum].iov_len  = sizeof(pdu_header);
-		iovnum++;
-		txsize += sizeof(pdu_header);
-
-		/* 2. setup transfer buffer */
-		if (usb_pipein(urb->pipe) &&
-		    usb_pipetype(urb->pipe) != PIPE_ISOCHRONOUS &&
-		    urb->actual_length > 0) {
-			iov[iovnum].iov_base = urb->transfer_buffer;
-			iov[iovnum].iov_len  = urb->actual_length;
-			iovnum++;
-			txsize += urb->actual_length;
-		} else if (usb_pipein(urb->pipe) &&
-			   usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) {
-			/*
-			 * For isochronous packets: actual length is the sum of
-			 * the actual length of the individual, packets, but as
-			 * the packet offsets are not changed there will be
-			 * padding between the packets. To optimally use the
-			 * bandwidth the padding is not transmitted.
-			 */
-
-			int i;
-
-			for (i = 0; i < urb->number_of_packets; i++) {
-				iov[iovnum].iov_base = urb->transfer_buffer +
-					urb->iso_frame_desc[i].offset;
-				iov[iovnum].iov_len =
-					urb->iso_frame_desc[i].actual_length;
-				iovnum++;
-				txsize += urb->iso_frame_desc[i].actual_length;
-			}
-
-			if (txsize != sizeof(pdu_header) + urb->actual_length) {
-				dev_err(&sdev->interface->dev,
-					"actual length of urb %d does not match iso packet sizes %zu\n",
-					urb->actual_length,
-					txsize-sizeof(pdu_header));
-				kfree(iov);
-				usbip_event_add(&sdev->ud,
-						SDEV_EVENT_ERROR_TCP);
-			   return -1;
-			}
-		}
-
-		/* 3. setup iso_packet_descriptor */
-		if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) {
-			ssize_t len = 0;
-
-			iso_buffer = usbip_alloc_iso_desc_pdu(urb, &len);
-			if (!iso_buffer) {
-				usbip_event_add(&sdev->ud,
-						SDEV_EVENT_ERROR_MALLOC);
-				kfree(iov);
-				return -1;
-			}
-
-			iov[iovnum].iov_base = iso_buffer;
-			iov[iovnum].iov_len  = len;
-			txsize += len;
-			iovnum++;
-		}
-
-		ret = kernel_sendmsg(sdev->ud.tcp_socket, &msg,
-						iov,  iovnum, txsize);
-		if (ret != txsize) {
-			dev_err(&sdev->interface->dev,
-				"sendmsg failed!, retval %d for %zd\n",
-				ret, txsize);
-			kfree(iov);
-			kfree(iso_buffer);
-			usbip_event_add(&sdev->ud, SDEV_EVENT_ERROR_TCP);
-			return -1;
-		}
-
-		kfree(iov);
-		kfree(iso_buffer);
-
-		total_size += txsize;
-	}
-
-	spin_lock_irqsave(&sdev->priv_lock, flags);
-	list_for_each_entry_safe(priv, tmp, &sdev->priv_free, list) {
-		stub_free_priv_and_urb(priv);
-	}
-	spin_unlock_irqrestore(&sdev->priv_lock, flags);
-
-	return total_size;
-}
-
-static struct stub_unlink *dequeue_from_unlink_tx(struct stub_device *sdev)
-{
-	unsigned long flags;
-	struct stub_unlink *unlink, *tmp;
-
-	spin_lock_irqsave(&sdev->priv_lock, flags);
-
-	list_for_each_entry_safe(unlink, tmp, &sdev->unlink_tx, list) {
-		list_move_tail(&unlink->list, &sdev->unlink_free);
-		spin_unlock_irqrestore(&sdev->priv_lock, flags);
-		return unlink;
-	}
-
-	spin_unlock_irqrestore(&sdev->priv_lock, flags);
-
-	return NULL;
-}
-
-static int stub_send_ret_unlink(struct stub_device *sdev)
-{
-	unsigned long flags;
-	struct stub_unlink *unlink, *tmp;
-
-	struct msghdr msg;
-	struct kvec iov[1];
-	size_t txsize;
-
-	size_t total_size = 0;
-
-	while ((unlink = dequeue_from_unlink_tx(sdev)) != NULL) {
-		int ret;
-		struct usbip_header pdu_header;
-
-		txsize = 0;
-		memset(&pdu_header, 0, sizeof(pdu_header));
-		memset(&msg, 0, sizeof(msg));
-		memset(&iov, 0, sizeof(iov));
-
-		usbip_dbg_stub_tx("setup ret unlink %lu\n", unlink->seqnum);
-
-		/* 1. setup usbip_header */
-		setup_ret_unlink_pdu(&pdu_header, unlink);
-		usbip_header_correct_endian(&pdu_header, 1);
-
-		iov[0].iov_base = &pdu_header;
-		iov[0].iov_len  = sizeof(pdu_header);
-		txsize += sizeof(pdu_header);
-
-		ret = kernel_sendmsg(sdev->ud.tcp_socket, &msg, iov,
-				     1, txsize);
-		if (ret != txsize) {
-			dev_err(&sdev->interface->dev,
-				"sendmsg failed!, retval %d for %zd\n",
-				ret, txsize);
-			usbip_event_add(&sdev->ud, SDEV_EVENT_ERROR_TCP);
-			return -1;
-		}
-
-		usbip_dbg_stub_tx("send txdata\n");
-		total_size += txsize;
-	}
-
-	spin_lock_irqsave(&sdev->priv_lock, flags);
-
-	list_for_each_entry_safe(unlink, tmp, &sdev->unlink_free, list) {
-		list_del(&unlink->list);
-		kfree(unlink);
-	}
-
-	spin_unlock_irqrestore(&sdev->priv_lock, flags);
-
-	return total_size;
-}
-
-int stub_tx_loop(void *data)
-{
-	struct usbip_device *ud = data;
-	struct stub_device *sdev = container_of(ud, struct stub_device, ud);
-
-	while (!kthread_should_stop()) {
-		if (usbip_event_happened(ud))
-			break;
-
-		/*
-		 * send_ret_submit comes earlier than send_ret_unlink.  stub_rx
-		 * looks at only priv_init queue. If the completion of a URB is
-		 * earlier than the receive of CMD_UNLINK, priv is moved to
-		 * priv_tx queue and stub_rx does not find the target priv. In
-		 * this case, vhci_rx receives the result of the submit request
-		 * and then receives the result of the unlink request. The
-		 * result of the submit is given back to the usbcore as the
-		 * completion of the unlink request. The request of the
-		 * unlink is ignored. This is ok because a driver who calls
-		 * usb_unlink_urb() understands the unlink was too late by
-		 * getting the status of the given-backed URB which has the
-		 * status of usb_submit_urb().
-		 */
-		if (stub_send_ret_submit(sdev) < 0)
-			break;
-
-		if (stub_send_ret_unlink(sdev) < 0)
-			break;
-
-		wait_event_interruptible(sdev->tx_waitq,
-					 (!list_empty(&sdev->priv_tx) ||
-					  !list_empty(&sdev->unlink_tx) ||
-					  kthread_should_stop()));
-	}
-
-	return 0;
-}
diff --git a/drivers/staging/usbip/uapi/usbip.h b/drivers/staging/usbip/uapi/usbip.h
deleted file mode 100644
index fa5db30ede36..000000000000
--- a/drivers/staging/usbip/uapi/usbip.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- *	usbip.h
- *
- *	USBIP uapi defines and function prototypes etc.
-*/
-
-#ifndef _UAPI_LINUX_USBIP_H
-#define _UAPI_LINUX_USBIP_H
-
-/* usbip device status - exported in usbip device sysfs status */
-enum usbip_device_status {
-	/* sdev is available. */
-	SDEV_ST_AVAILABLE = 0x01,
-	/* sdev is now used. */
-	SDEV_ST_USED,
-	/* sdev is unusable because of a fatal error. */
-	SDEV_ST_ERROR,
-
-	/* vdev does not connect a remote device. */
-	VDEV_ST_NULL,
-	/* vdev is used, but the USB address is not assigned yet */
-	VDEV_ST_NOTASSIGNED,
-	VDEV_ST_USED,
-	VDEV_ST_ERROR
-};
-#endif /* _UAPI_LINUX_USBIP_H */
diff --git a/drivers/staging/usbip/usbip_common.c b/drivers/staging/usbip/usbip_common.c
deleted file mode 100644
index facaaf003f19..000000000000
--- a/drivers/staging/usbip/usbip_common.c
+++ /dev/null
@@ -1,776 +0,0 @@
-/*
- * Copyright (C) 2003-2008 Takahiro Hirofuchi
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- * USA.
- */
-
-#include <asm/byteorder.h>
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/stat.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <net/sock.h>
-
-#include "usbip_common.h"
-
-#define DRIVER_AUTHOR "Takahiro Hirofuchi <hirofuchi@users.sourceforge.net>"
-#define DRIVER_DESC "USB/IP Core"
-
-#ifdef CONFIG_USBIP_DEBUG
-unsigned long usbip_debug_flag = 0xffffffff;
-#else
-unsigned long usbip_debug_flag;
-#endif
-EXPORT_SYMBOL_GPL(usbip_debug_flag);
-module_param(usbip_debug_flag, ulong, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(usbip_debug_flag, "debug flags (defined in usbip_common.h)");
-
-/* FIXME */
-struct device_attribute dev_attr_usbip_debug;
-EXPORT_SYMBOL_GPL(dev_attr_usbip_debug);
-
-static ssize_t usbip_debug_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "%lx\n", usbip_debug_flag);
-}
-
-static ssize_t usbip_debug_store(struct device *dev,
-				 struct device_attribute *attr, const char *buf,
-				 size_t count)
-{
-	if (sscanf(buf, "%lx", &usbip_debug_flag) != 1)
-		return -EINVAL;
-	return count;
-}
-DEVICE_ATTR_RW(usbip_debug);
-
-static void usbip_dump_buffer(char *buff, int bufflen)
-{
-	print_hex_dump(KERN_DEBUG, "usbip-core", DUMP_PREFIX_OFFSET, 16, 4,
-		       buff, bufflen, false);
-}
-
-static void usbip_dump_pipe(unsigned int p)
-{
-	unsigned char type = usb_pipetype(p);
-	unsigned char ep   = usb_pipeendpoint(p);
-	unsigned char dev  = usb_pipedevice(p);
-	unsigned char dir  = usb_pipein(p);
-
-	pr_debug("dev(%d) ep(%d) [%s] ", dev, ep, dir ? "IN" : "OUT");
-
-	switch (type) {
-	case PIPE_ISOCHRONOUS:
-		pr_debug("ISO\n");
-		break;
-	case PIPE_INTERRUPT:
-		pr_debug("INT\n");
-		break;
-	case PIPE_CONTROL:
-		pr_debug("CTRL\n");
-		break;
-	case PIPE_BULK:
-		pr_debug("BULK\n");
-		break;
-	default:
-		pr_debug("ERR\n");
-		break;
-	}
-}
-
-static void usbip_dump_usb_device(struct usb_device *udev)
-{
-	struct device *dev = &udev->dev;
-	int i;
-
-	dev_dbg(dev, "       devnum(%d) devpath(%s) usb speed(%s)",
-		udev->devnum, udev->devpath, usb_speed_string(udev->speed));
-
-	pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport);
-
-	dev_dbg(dev, "                    ");
-	for (i = 0; i < 16; i++)
-		pr_debug(" %2u", i);
-	pr_debug("\n");
-
-	dev_dbg(dev, "       toggle0(IN) :");
-	for (i = 0; i < 16; i++)
-		pr_debug(" %2u", (udev->toggle[0] & (1 << i)) ? 1 : 0);
-	pr_debug("\n");
-
-	dev_dbg(dev, "       toggle1(OUT):");
-	for (i = 0; i < 16; i++)
-		pr_debug(" %2u", (udev->toggle[1] & (1 << i)) ? 1 : 0);
-	pr_debug("\n");
-
-	dev_dbg(dev, "       epmaxp_in   :");
-	for (i = 0; i < 16; i++) {
-		if (udev->ep_in[i])
-			pr_debug(" %2u",
-			    le16_to_cpu(udev->ep_in[i]->desc.wMaxPacketSize));
-	}
-	pr_debug("\n");
-
-	dev_dbg(dev, "       epmaxp_out  :");
-	for (i = 0; i < 16; i++) {
-		if (udev->ep_out[i])
-			pr_debug(" %2u",
-			    le16_to_cpu(udev->ep_out[i]->desc.wMaxPacketSize));
-	}
-	pr_debug("\n");
-
-	dev_dbg(dev, "parent %p, bus %p\n", udev->parent, udev->bus);
-
-	dev_dbg(dev,
-		"descriptor %p, config %p, actconfig %p, rawdescriptors %p\n",
-		&udev->descriptor, udev->config,
-		udev->actconfig, udev->rawdescriptors);
-
-	dev_dbg(dev, "have_langid %d, string_langid %d\n",
-		udev->have_langid, udev->string_langid);
-
-	dev_dbg(dev, "maxchild %d\n", udev->maxchild);
-}
-
-static void usbip_dump_request_type(__u8 rt)
-{
-	switch (rt & USB_RECIP_MASK) {
-	case USB_RECIP_DEVICE:
-		pr_debug("DEVICE");
-		break;
-	case USB_RECIP_INTERFACE:
-		pr_debug("INTERF");
-		break;
-	case USB_RECIP_ENDPOINT:
-		pr_debug("ENDPOI");
-		break;
-	case USB_RECIP_OTHER:
-		pr_debug("OTHER ");
-		break;
-	default:
-		pr_debug("------");
-		break;
-	}
-}
-
-static void usbip_dump_usb_ctrlrequest(struct usb_ctrlrequest *cmd)
-{
-	if (!cmd) {
-		pr_debug("       : null pointer\n");
-		return;
-	}
-
-	pr_debug("       ");
-	pr_debug("bRequestType(%02X) bRequest(%02X) wValue(%04X) wIndex(%04X) wLength(%04X) ",
-		 cmd->bRequestType, cmd->bRequest,
-		 cmd->wValue, cmd->wIndex, cmd->wLength);
-	pr_debug("\n       ");
-
-	if ((cmd->bRequestType & USB_TYPE_MASK) == USB_TYPE_STANDARD) {
-		pr_debug("STANDARD ");
-		switch (cmd->bRequest) {
-		case USB_REQ_GET_STATUS:
-			pr_debug("GET_STATUS\n");
-			break;
-		case USB_REQ_CLEAR_FEATURE:
-			pr_debug("CLEAR_FEAT\n");
-			break;
-		case USB_REQ_SET_FEATURE:
-			pr_debug("SET_FEAT\n");
-			break;
-		case USB_REQ_SET_ADDRESS:
-			pr_debug("SET_ADDRRS\n");
-			break;
-		case USB_REQ_GET_DESCRIPTOR:
-			pr_debug("GET_DESCRI\n");
-			break;
-		case USB_REQ_SET_DESCRIPTOR:
-			pr_debug("SET_DESCRI\n");
-			break;
-		case USB_REQ_GET_CONFIGURATION:
-			pr_debug("GET_CONFIG\n");
-			break;
-		case USB_REQ_SET_CONFIGURATION:
-			pr_debug("SET_CONFIG\n");
-			break;
-		case USB_REQ_GET_INTERFACE:
-			pr_debug("GET_INTERF\n");
-			break;
-		case USB_REQ_SET_INTERFACE:
-			pr_debug("SET_INTERF\n");
-			break;
-		case USB_REQ_SYNCH_FRAME:
-			pr_debug("SYNC_FRAME\n");
-			break;
-		default:
-			pr_debug("REQ(%02X)\n", cmd->bRequest);
-			break;
-		}
-		usbip_dump_request_type(cmd->bRequestType);
-	} else if ((cmd->bRequestType & USB_TYPE_MASK) == USB_TYPE_CLASS) {
-		pr_debug("CLASS\n");
-	} else if ((cmd->bRequestType & USB_TYPE_MASK) == USB_TYPE_VENDOR) {
-		pr_debug("VENDOR\n");
-	} else if ((cmd->bRequestType & USB_TYPE_MASK) == USB_TYPE_RESERVED) {
-		pr_debug("RESERVED\n");
-	}
-}
-
-void usbip_dump_urb(struct urb *urb)
-{
-	struct device *dev;
-
-	if (!urb) {
-		pr_debug("urb: null pointer!!\n");
-		return;
-	}
-
-	if (!urb->dev) {
-		pr_debug("urb->dev: null pointer!!\n");
-		return;
-	}
-
-	dev = &urb->dev->dev;
-
-	dev_dbg(dev, "   urb                   :%p\n", urb);
-	dev_dbg(dev, "   dev                   :%p\n", urb->dev);
-
-	usbip_dump_usb_device(urb->dev);
-
-	dev_dbg(dev, "   pipe                  :%08x ", urb->pipe);
-
-	usbip_dump_pipe(urb->pipe);
-
-	dev_dbg(dev, "   status                :%d\n", urb->status);
-	dev_dbg(dev, "   transfer_flags        :%08X\n", urb->transfer_flags);
-	dev_dbg(dev, "   transfer_buffer       :%p\n", urb->transfer_buffer);
-	dev_dbg(dev, "   transfer_buffer_length:%d\n",
-						urb->transfer_buffer_length);
-	dev_dbg(dev, "   actual_length         :%d\n", urb->actual_length);
-	dev_dbg(dev, "   setup_packet          :%p\n", urb->setup_packet);
-
-	if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL)
-		usbip_dump_usb_ctrlrequest(
-			(struct usb_ctrlrequest *)urb->setup_packet);
-
-	dev_dbg(dev, "   start_frame           :%d\n", urb->start_frame);
-	dev_dbg(dev, "   number_of_packets     :%d\n", urb->number_of_packets);
-	dev_dbg(dev, "   interval              :%d\n", urb->interval);
-	dev_dbg(dev, "   error_count           :%d\n", urb->error_count);
-	dev_dbg(dev, "   context               :%p\n", urb->context);
-	dev_dbg(dev, "   complete              :%p\n", urb->complete);
-}
-EXPORT_SYMBOL_GPL(usbip_dump_urb);
-
-void usbip_dump_header(struct usbip_header *pdu)
-{
-	pr_debug("BASE: cmd %u seq %u devid %u dir %u ep %u\n",
-		 pdu->base.command,
-		 pdu->base.seqnum,
-		 pdu->base.devid,
-		 pdu->base.direction,
-		 pdu->base.ep);
-
-	switch (pdu->base.command) {
-	case USBIP_CMD_SUBMIT:
-		pr_debug("USBIP_CMD_SUBMIT: x_flags %u x_len %u sf %u #p %d iv %d\n",
-			 pdu->u.cmd_submit.transfer_flags,
-			 pdu->u.cmd_submit.transfer_buffer_length,
-			 pdu->u.cmd_submit.start_frame,
-			 pdu->u.cmd_submit.number_of_packets,
-			 pdu->u.cmd_submit.interval);
-		break;
-	case USBIP_CMD_UNLINK:
-		pr_debug("USBIP_CMD_UNLINK: seq %u\n",
-			 pdu->u.cmd_unlink.seqnum);
-		break;
-	case USBIP_RET_SUBMIT:
-		pr_debug("USBIP_RET_SUBMIT: st %d al %u sf %d #p %d ec %d\n",
-			 pdu->u.ret_submit.status,
-			 pdu->u.ret_submit.actual_length,
-			 pdu->u.ret_submit.start_frame,
-			 pdu->u.ret_submit.number_of_packets,
-			 pdu->u.ret_submit.error_count);
-		break;
-	case USBIP_RET_UNLINK:
-		pr_debug("USBIP_RET_UNLINK: status %d\n",
-			 pdu->u.ret_unlink.status);
-		break;
-	default:
-		/* NOT REACHED */
-		pr_err("unknown command\n");
-		break;
-	}
-}
-EXPORT_SYMBOL_GPL(usbip_dump_header);
-
-/* Receive data over TCP/IP. */
-int usbip_recv(struct socket *sock, void *buf, int size)
-{
-	int result;
-	struct msghdr msg;
-	struct kvec iov;
-	int total = 0;
-
-	/* for blocks of if (usbip_dbg_flag_xmit) */
-	char *bp = buf;
-	int osize = size;
-
-	usbip_dbg_xmit("enter\n");
-
-	if (!sock || !buf || !size) {
-		pr_err("invalid arg, sock %p buff %p size %d\n", sock, buf,
-		       size);
-		return -EINVAL;
-	}
-
-	do {
-		sock->sk->sk_allocation = GFP_NOIO;
-		iov.iov_base    = buf;
-		iov.iov_len     = size;
-		msg.msg_name    = NULL;
-		msg.msg_namelen = 0;
-		msg.msg_control = NULL;
-		msg.msg_controllen = 0;
-		msg.msg_flags      = MSG_NOSIGNAL;
-
-		result = kernel_recvmsg(sock, &msg, &iov, 1, size, MSG_WAITALL);
-		if (result <= 0) {
-			pr_debug("receive sock %p buf %p size %u ret %d total %d\n",
-				 sock, buf, size, result, total);
-			goto err;
-		}
-
-		size -= result;
-		buf += result;
-		total += result;
-	} while (size > 0);
-
-	if (usbip_dbg_flag_xmit) {
-		if (!in_interrupt())
-			pr_debug("%-10s:", current->comm);
-		else
-			pr_debug("interrupt  :");
-
-		pr_debug("receiving....\n");
-		usbip_dump_buffer(bp, osize);
-		pr_debug("received, osize %d ret %d size %d total %d\n",
-			 osize, result, size, total);
-	}
-
-	return total;
-
-err:
-	return result;
-}
-EXPORT_SYMBOL_GPL(usbip_recv);
-
-/* there may be more cases to tweak the flags. */
-static unsigned int tweak_transfer_flags(unsigned int flags)
-{
-	flags &= ~URB_NO_TRANSFER_DMA_MAP;
-	return flags;
-}
-
-static void usbip_pack_cmd_submit(struct usbip_header *pdu, struct urb *urb,
-				  int pack)
-{
-	struct usbip_header_cmd_submit *spdu = &pdu->u.cmd_submit;
-
-	/*
-	 * Some members are not still implemented in usbip. I hope this issue
-	 * will be discussed when usbip is ported to other operating systems.
-	 */
-	if (pack) {
-		spdu->transfer_flags =
-			tweak_transfer_flags(urb->transfer_flags);
-		spdu->transfer_buffer_length	= urb->transfer_buffer_length;
-		spdu->start_frame		= urb->start_frame;
-		spdu->number_of_packets		= urb->number_of_packets;
-		spdu->interval			= urb->interval;
-	} else  {
-		urb->transfer_flags         = spdu->transfer_flags;
-		urb->transfer_buffer_length = spdu->transfer_buffer_length;
-		urb->start_frame            = spdu->start_frame;
-		urb->number_of_packets      = spdu->number_of_packets;
-		urb->interval               = spdu->interval;
-	}
-}
-
-static void usbip_pack_ret_submit(struct usbip_header *pdu, struct urb *urb,
-				  int pack)
-{
-	struct usbip_header_ret_submit *rpdu = &pdu->u.ret_submit;
-
-	if (pack) {
-		rpdu->status		= urb->status;
-		rpdu->actual_length	= urb->actual_length;
-		rpdu->start_frame	= urb->start_frame;
-		rpdu->number_of_packets = urb->number_of_packets;
-		rpdu->error_count	= urb->error_count;
-	} else {
-		urb->status		= rpdu->status;
-		urb->actual_length	= rpdu->actual_length;
-		urb->start_frame	= rpdu->start_frame;
-		urb->number_of_packets = rpdu->number_of_packets;
-		urb->error_count	= rpdu->error_count;
-	}
-}
-
-void usbip_pack_pdu(struct usbip_header *pdu, struct urb *urb, int cmd,
-		    int pack)
-{
-	switch (cmd) {
-	case USBIP_CMD_SUBMIT:
-		usbip_pack_cmd_submit(pdu, urb, pack);
-		break;
-	case USBIP_RET_SUBMIT:
-		usbip_pack_ret_submit(pdu, urb, pack);
-		break;
-	default:
-		/* NOT REACHED */
-		pr_err("unknown command\n");
-		break;
-	}
-}
-EXPORT_SYMBOL_GPL(usbip_pack_pdu);
-
-static void correct_endian_basic(struct usbip_header_basic *base, int send)
-{
-	if (send) {
-		base->command	= cpu_to_be32(base->command);
-		base->seqnum	= cpu_to_be32(base->seqnum);
-		base->devid	= cpu_to_be32(base->devid);
-		base->direction	= cpu_to_be32(base->direction);
-		base->ep	= cpu_to_be32(base->ep);
-	} else {
-		base->command	= be32_to_cpu(base->command);
-		base->seqnum	= be32_to_cpu(base->seqnum);
-		base->devid	= be32_to_cpu(base->devid);
-		base->direction	= be32_to_cpu(base->direction);
-		base->ep	= be32_to_cpu(base->ep);
-	}
-}
-
-static void correct_endian_cmd_submit(struct usbip_header_cmd_submit *pdu,
-				      int send)
-{
-	if (send) {
-		pdu->transfer_flags = cpu_to_be32(pdu->transfer_flags);
-
-		cpu_to_be32s(&pdu->transfer_buffer_length);
-		cpu_to_be32s(&pdu->start_frame);
-		cpu_to_be32s(&pdu->number_of_packets);
-		cpu_to_be32s(&pdu->interval);
-	} else {
-		pdu->transfer_flags = be32_to_cpu(pdu->transfer_flags);
-
-		be32_to_cpus(&pdu->transfer_buffer_length);
-		be32_to_cpus(&pdu->start_frame);
-		be32_to_cpus(&pdu->number_of_packets);
-		be32_to_cpus(&pdu->interval);
-	}
-}
-
-static void correct_endian_ret_submit(struct usbip_header_ret_submit *pdu,
-				      int send)
-{
-	if (send) {
-		cpu_to_be32s(&pdu->status);
-		cpu_to_be32s(&pdu->actual_length);
-		cpu_to_be32s(&pdu->start_frame);
-		cpu_to_be32s(&pdu->number_of_packets);
-		cpu_to_be32s(&pdu->error_count);
-	} else {
-		be32_to_cpus(&pdu->status);
-		be32_to_cpus(&pdu->actual_length);
-		be32_to_cpus(&pdu->start_frame);
-		be32_to_cpus(&pdu->number_of_packets);
-		be32_to_cpus(&pdu->error_count);
-	}
-}
-
-static void correct_endian_cmd_unlink(struct usbip_header_cmd_unlink *pdu,
-				      int send)
-{
-	if (send)
-		pdu->seqnum = cpu_to_be32(pdu->seqnum);
-	else
-		pdu->seqnum = be32_to_cpu(pdu->seqnum);
-}
-
-static void correct_endian_ret_unlink(struct usbip_header_ret_unlink *pdu,
-				      int send)
-{
-	if (send)
-		cpu_to_be32s(&pdu->status);
-	else
-		be32_to_cpus(&pdu->status);
-}
-
-void usbip_header_correct_endian(struct usbip_header *pdu, int send)
-{
-	__u32 cmd = 0;
-
-	if (send)
-		cmd = pdu->base.command;
-
-	correct_endian_basic(&pdu->base, send);
-
-	if (!send)
-		cmd = pdu->base.command;
-
-	switch (cmd) {
-	case USBIP_CMD_SUBMIT:
-		correct_endian_cmd_submit(&pdu->u.cmd_submit, send);
-		break;
-	case USBIP_RET_SUBMIT:
-		correct_endian_ret_submit(&pdu->u.ret_submit, send);
-		break;
-	case USBIP_CMD_UNLINK:
-		correct_endian_cmd_unlink(&pdu->u.cmd_unlink, send);
-		break;
-	case USBIP_RET_UNLINK:
-		correct_endian_ret_unlink(&pdu->u.ret_unlink, send);
-		break;
-	default:
-		/* NOT REACHED */
-		pr_err("unknown command\n");
-		break;
-	}
-}
-EXPORT_SYMBOL_GPL(usbip_header_correct_endian);
-
-static void usbip_iso_packet_correct_endian(
-		struct usbip_iso_packet_descriptor *iso, int send)
-{
-	/* does not need all members. but copy all simply. */
-	if (send) {
-		iso->offset	= cpu_to_be32(iso->offset);
-		iso->length	= cpu_to_be32(iso->length);
-		iso->status	= cpu_to_be32(iso->status);
-		iso->actual_length = cpu_to_be32(iso->actual_length);
-	} else {
-		iso->offset	= be32_to_cpu(iso->offset);
-		iso->length	= be32_to_cpu(iso->length);
-		iso->status	= be32_to_cpu(iso->status);
-		iso->actual_length = be32_to_cpu(iso->actual_length);
-	}
-}
-
-static void usbip_pack_iso(struct usbip_iso_packet_descriptor *iso,
-			   struct usb_iso_packet_descriptor *uiso, int pack)
-{
-	if (pack) {
-		iso->offset		= uiso->offset;
-		iso->length		= uiso->length;
-		iso->status		= uiso->status;
-		iso->actual_length	= uiso->actual_length;
-	} else {
-		uiso->offset		= iso->offset;
-		uiso->length		= iso->length;
-		uiso->status		= iso->status;
-		uiso->actual_length	= iso->actual_length;
-	}
-}
-
-/* must free buffer */
-struct usbip_iso_packet_descriptor*
-usbip_alloc_iso_desc_pdu(struct urb *urb, ssize_t *bufflen)
-{
-	struct usbip_iso_packet_descriptor *iso;
-	int np = urb->number_of_packets;
-	ssize_t size = np * sizeof(*iso);
-	int i;
-
-	iso = kzalloc(size, GFP_KERNEL);
-	if (!iso)
-		return NULL;
-
-	for (i = 0; i < np; i++) {
-		usbip_pack_iso(&iso[i], &urb->iso_frame_desc[i], 1);
-		usbip_iso_packet_correct_endian(&iso[i], 1);
-	}
-
-	*bufflen = size;
-
-	return iso;
-}
-EXPORT_SYMBOL_GPL(usbip_alloc_iso_desc_pdu);
-
-/* some members of urb must be substituted before. */
-int usbip_recv_iso(struct usbip_device *ud, struct urb *urb)
-{
-	void *buff;
-	struct usbip_iso_packet_descriptor *iso;
-	int np = urb->number_of_packets;
-	int size = np * sizeof(*iso);
-	int i;
-	int ret;
-	int total_length = 0;
-
-	if (!usb_pipeisoc(urb->pipe))
-		return 0;
-
-	/* my Bluetooth dongle gets ISO URBs which are np = 0 */
-	if (np == 0)
-		return 0;
-
-	buff = kzalloc(size, GFP_KERNEL);
-	if (!buff)
-		return -ENOMEM;
-
-	ret = usbip_recv(ud->tcp_socket, buff, size);
-	if (ret != size) {
-		dev_err(&urb->dev->dev, "recv iso_frame_descriptor, %d\n",
-			ret);
-		kfree(buff);
-
-		if (ud->side == USBIP_STUB)
-			usbip_event_add(ud, SDEV_EVENT_ERROR_TCP);
-		else
-			usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
-
-		return -EPIPE;
-	}
-
-	iso = (struct usbip_iso_packet_descriptor *) buff;
-	for (i = 0; i < np; i++) {
-		usbip_iso_packet_correct_endian(&iso[i], 0);
-		usbip_pack_iso(&iso[i], &urb->iso_frame_desc[i], 0);
-		total_length += urb->iso_frame_desc[i].actual_length;
-	}
-
-	kfree(buff);
-
-	if (total_length != urb->actual_length) {
-		dev_err(&urb->dev->dev,
-			"total length of iso packets %d not equal to actual length of buffer %d\n",
-			total_length, urb->actual_length);
-
-		if (ud->side == USBIP_STUB)
-			usbip_event_add(ud, SDEV_EVENT_ERROR_TCP);
-		else
-			usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
-
-		return -EPIPE;
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(usbip_recv_iso);
-
-/*
- * This functions restores the padding which was removed for optimizing
- * the bandwidth during transfer over tcp/ip
- *
- * buffer and iso packets need to be stored and be in propeper endian in urb
- * before calling this function
- */
-void usbip_pad_iso(struct usbip_device *ud, struct urb *urb)
-{
-	int np = urb->number_of_packets;
-	int i;
-	int actualoffset = urb->actual_length;
-
-	if (!usb_pipeisoc(urb->pipe))
-		return;
-
-	/* if no packets or length of data is 0, then nothing to unpack */
-	if (np == 0 || urb->actual_length == 0)
-		return;
-
-	/*
-	 * if actual_length is transfer_buffer_length then no padding is
-	 * present.
-	 */
-	if (urb->actual_length == urb->transfer_buffer_length)
-		return;
-
-	/*
-	 * loop over all packets from last to first (to prevent overwritting
-	 * memory when padding) and move them into the proper place
-	 */
-	for (i = np-1; i > 0; i--) {
-		actualoffset -= urb->iso_frame_desc[i].actual_length;
-		memmove(urb->transfer_buffer + urb->iso_frame_desc[i].offset,
-			urb->transfer_buffer + actualoffset,
-			urb->iso_frame_desc[i].actual_length);
-	}
-}
-EXPORT_SYMBOL_GPL(usbip_pad_iso);
-
-/* some members of urb must be substituted before. */
-int usbip_recv_xbuff(struct usbip_device *ud, struct urb *urb)
-{
-	int ret;
-	int size;
-
-	if (ud->side == USBIP_STUB) {
-		/* the direction of urb must be OUT. */
-		if (usb_pipein(urb->pipe))
-			return 0;
-
-		size = urb->transfer_buffer_length;
-	} else {
-		/* the direction of urb must be IN. */
-		if (usb_pipeout(urb->pipe))
-			return 0;
-
-		size = urb->actual_length;
-	}
-
-	/* no need to recv xbuff */
-	if (!(size > 0))
-		return 0;
-
-	ret = usbip_recv(ud->tcp_socket, urb->transfer_buffer, size);
-	if (ret != size) {
-		dev_err(&urb->dev->dev, "recv xbuf, %d\n", ret);
-		if (ud->side == USBIP_STUB) {
-			usbip_event_add(ud, SDEV_EVENT_ERROR_TCP);
-		} else {
-			usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
-			return -EPIPE;
-		}
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(usbip_recv_xbuff);
-
-static int __init usbip_core_init(void)
-{
-	pr_info(DRIVER_DESC " v" USBIP_VERSION "\n");
-	return 0;
-}
-
-static void __exit usbip_core_exit(void)
-{
-	return;
-}
-
-module_init(usbip_core_init);
-module_exit(usbip_core_exit);
-
-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_LICENSE("GPL");
-MODULE_VERSION(USBIP_VERSION);
diff --git a/drivers/staging/usbip/usbip_common.h b/drivers/staging/usbip/usbip_common.h
deleted file mode 100644
index 4da3866a037d..000000000000
--- a/drivers/staging/usbip/usbip_common.h
+++ /dev/null
@@ -1,335 +0,0 @@
-/*
- * Copyright (C) 2003-2008 Takahiro Hirofuchi
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- * USA.
- */
-
-#ifndef __USBIP_COMMON_H
-#define __USBIP_COMMON_H
-
-#include <linux/compiler.h>
-#include <linux/device.h>
-#include <linux/interrupt.h>
-#include <linux/net.h>
-#include <linux/printk.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/usb.h>
-#include <linux/wait.h>
-#include "uapi/usbip.h"
-
-#define USBIP_VERSION "1.0.0"
-
-#undef pr_fmt
-
-#ifdef DEBUG
-#define pr_fmt(fmt)     KBUILD_MODNAME ": %s:%d: " fmt, __func__, __LINE__
-#else
-#define pr_fmt(fmt)     KBUILD_MODNAME ": " fmt
-#endif
-
-enum {
-	usbip_debug_xmit	= (1 << 0),
-	usbip_debug_sysfs	= (1 << 1),
-	usbip_debug_urb		= (1 << 2),
-	usbip_debug_eh		= (1 << 3),
-
-	usbip_debug_stub_cmp	= (1 << 8),
-	usbip_debug_stub_dev	= (1 << 9),
-	usbip_debug_stub_rx	= (1 << 10),
-	usbip_debug_stub_tx	= (1 << 11),
-
-	usbip_debug_vhci_rh	= (1 << 8),
-	usbip_debug_vhci_hc	= (1 << 9),
-	usbip_debug_vhci_rx	= (1 << 10),
-	usbip_debug_vhci_tx	= (1 << 11),
-	usbip_debug_vhci_sysfs  = (1 << 12)
-};
-
-#define usbip_dbg_flag_xmit	(usbip_debug_flag & usbip_debug_xmit)
-#define usbip_dbg_flag_vhci_rh	(usbip_debug_flag & usbip_debug_vhci_rh)
-#define usbip_dbg_flag_vhci_hc	(usbip_debug_flag & usbip_debug_vhci_hc)
-#define usbip_dbg_flag_vhci_rx	(usbip_debug_flag & usbip_debug_vhci_rx)
-#define usbip_dbg_flag_vhci_tx	(usbip_debug_flag & usbip_debug_vhci_tx)
-#define usbip_dbg_flag_stub_rx	(usbip_debug_flag & usbip_debug_stub_rx)
-#define usbip_dbg_flag_stub_tx	(usbip_debug_flag & usbip_debug_stub_tx)
-#define usbip_dbg_flag_vhci_sysfs  (usbip_debug_flag & usbip_debug_vhci_sysfs)
-
-extern unsigned long usbip_debug_flag;
-extern struct device_attribute dev_attr_usbip_debug;
-
-#define usbip_dbg_with_flag(flag, fmt, args...)		\
-	do {						\
-		if (flag & usbip_debug_flag)		\
-			pr_debug(fmt, ##args);		\
-	} while (0)
-
-#define usbip_dbg_sysfs(fmt, args...) \
-	usbip_dbg_with_flag(usbip_debug_sysfs, fmt , ##args)
-#define usbip_dbg_xmit(fmt, args...) \
-	usbip_dbg_with_flag(usbip_debug_xmit, fmt , ##args)
-#define usbip_dbg_urb(fmt, args...) \
-	usbip_dbg_with_flag(usbip_debug_urb, fmt , ##args)
-#define usbip_dbg_eh(fmt, args...) \
-	usbip_dbg_with_flag(usbip_debug_eh, fmt , ##args)
-
-#define usbip_dbg_vhci_rh(fmt, args...)	\
-	usbip_dbg_with_flag(usbip_debug_vhci_rh, fmt , ##args)
-#define usbip_dbg_vhci_hc(fmt, args...)	\
-	usbip_dbg_with_flag(usbip_debug_vhci_hc, fmt , ##args)
-#define usbip_dbg_vhci_rx(fmt, args...)	\
-	usbip_dbg_with_flag(usbip_debug_vhci_rx, fmt , ##args)
-#define usbip_dbg_vhci_tx(fmt, args...)	\
-	usbip_dbg_with_flag(usbip_debug_vhci_tx, fmt , ##args)
-#define usbip_dbg_vhci_sysfs(fmt, args...) \
-	usbip_dbg_with_flag(usbip_debug_vhci_sysfs, fmt , ##args)
-
-#define usbip_dbg_stub_cmp(fmt, args...) \
-	usbip_dbg_with_flag(usbip_debug_stub_cmp, fmt , ##args)
-#define usbip_dbg_stub_rx(fmt, args...) \
-	usbip_dbg_with_flag(usbip_debug_stub_rx, fmt , ##args)
-#define usbip_dbg_stub_tx(fmt, args...) \
-	usbip_dbg_with_flag(usbip_debug_stub_tx, fmt , ##args)
-
-/*
- * USB/IP request headers
- *
- * Each request is transferred across the network to its counterpart, which
- * facilitates the normal USB communication. The values contained in the headers
- * are basically the same as in a URB. Currently, four request types are
- * defined:
- *
- *  - USBIP_CMD_SUBMIT: a USB request block, corresponds to usb_submit_urb()
- *    (client to server)
- *
- *  - USBIP_RET_SUBMIT: the result of USBIP_CMD_SUBMIT
- *    (server to client)
- *
- *  - USBIP_CMD_UNLINK: an unlink request of a pending USBIP_CMD_SUBMIT,
- *    corresponds to usb_unlink_urb()
- *    (client to server)
- *
- *  - USBIP_RET_UNLINK: the result of USBIP_CMD_UNLINK
- *    (server to client)
- *
- */
-#define USBIP_CMD_SUBMIT	0x0001
-#define USBIP_CMD_UNLINK	0x0002
-#define USBIP_RET_SUBMIT	0x0003
-#define USBIP_RET_UNLINK	0x0004
-
-#define USBIP_DIR_OUT	0x00
-#define USBIP_DIR_IN	0x01
-
-/**
- * struct usbip_header_basic - data pertinent to every request
- * @command: the usbip request type
- * @seqnum: sequential number that identifies requests; incremented per
- *	    connection
- * @devid: specifies a remote USB device uniquely instead of busnum and devnum;
- *	   in the stub driver, this value is ((busnum << 16) | devnum)
- * @direction: direction of the transfer
- * @ep: endpoint number
- */
-struct usbip_header_basic {
-	__u32 command;
-	__u32 seqnum;
-	__u32 devid;
-	__u32 direction;
-	__u32 ep;
-} __packed;
-
-/**
- * struct usbip_header_cmd_submit - USBIP_CMD_SUBMIT packet header
- * @transfer_flags: URB flags
- * @transfer_buffer_length: the data size for (in) or (out) transfer
- * @start_frame: initial frame for isochronous or interrupt transfers
- * @number_of_packets: number of isochronous packets
- * @interval: maximum time for the request on the server-side host controller
- * @setup: setup data for a control request
- */
-struct usbip_header_cmd_submit {
-	__u32 transfer_flags;
-	__s32 transfer_buffer_length;
-
-	/* it is difficult for usbip to sync frames (reserved only?) */
-	__s32 start_frame;
-	__s32 number_of_packets;
-	__s32 interval;
-
-	unsigned char setup[8];
-} __packed;
-
-/**
- * struct usbip_header_ret_submit - USBIP_RET_SUBMIT packet header
- * @status: return status of a non-iso request
- * @actual_length: number of bytes transferred
- * @start_frame: initial frame for isochronous or interrupt transfers
- * @number_of_packets: number of isochronous packets
- * @error_count: number of errors for isochronous transfers
- */
-struct usbip_header_ret_submit {
-	__s32 status;
-	__s32 actual_length;
-	__s32 start_frame;
-	__s32 number_of_packets;
-	__s32 error_count;
-} __packed;
-
-/**
- * struct usbip_header_cmd_unlink - USBIP_CMD_UNLINK packet header
- * @seqnum: the URB seqnum to unlink
- */
-struct usbip_header_cmd_unlink {
-	__u32 seqnum;
-} __packed;
-
-/**
- * struct usbip_header_ret_unlink - USBIP_RET_UNLINK packet header
- * @status: return status of the request
- */
-struct usbip_header_ret_unlink {
-	__s32 status;
-} __packed;
-
-/**
- * struct usbip_header - common header for all usbip packets
- * @base: the basic header
- * @u: packet type dependent header
- */
-struct usbip_header {
-	struct usbip_header_basic base;
-
-	union {
-		struct usbip_header_cmd_submit	cmd_submit;
-		struct usbip_header_ret_submit	ret_submit;
-		struct usbip_header_cmd_unlink	cmd_unlink;
-		struct usbip_header_ret_unlink	ret_unlink;
-	} u;
-} __packed;
-
-/*
- * This is the same as usb_iso_packet_descriptor but packed for pdu.
- */
-struct usbip_iso_packet_descriptor {
-	__u32 offset;
-	__u32 length;			/* expected length */
-	__u32 actual_length;
-	__u32 status;
-} __packed;
-
-enum usbip_side {
-	USBIP_VHCI,
-	USBIP_STUB,
-};
-
-/* event handler */
-#define USBIP_EH_SHUTDOWN	(1 << 0)
-#define USBIP_EH_BYE		(1 << 1)
-#define USBIP_EH_RESET		(1 << 2)
-#define USBIP_EH_UNUSABLE	(1 << 3)
-
-#define SDEV_EVENT_REMOVED   (USBIP_EH_SHUTDOWN | USBIP_EH_RESET | USBIP_EH_BYE)
-#define	SDEV_EVENT_DOWN		(USBIP_EH_SHUTDOWN | USBIP_EH_RESET)
-#define	SDEV_EVENT_ERROR_TCP	(USBIP_EH_SHUTDOWN | USBIP_EH_RESET)
-#define	SDEV_EVENT_ERROR_SUBMIT	(USBIP_EH_SHUTDOWN | USBIP_EH_RESET)
-#define	SDEV_EVENT_ERROR_MALLOC	(USBIP_EH_SHUTDOWN | USBIP_EH_UNUSABLE)
-
-#define	VDEV_EVENT_REMOVED	(USBIP_EH_SHUTDOWN | USBIP_EH_BYE)
-#define	VDEV_EVENT_DOWN		(USBIP_EH_SHUTDOWN | USBIP_EH_RESET)
-#define	VDEV_EVENT_ERROR_TCP	(USBIP_EH_SHUTDOWN | USBIP_EH_RESET)
-#define	VDEV_EVENT_ERROR_MALLOC	(USBIP_EH_SHUTDOWN | USBIP_EH_UNUSABLE)
-
-/* a common structure for stub_device and vhci_device */
-struct usbip_device {
-	enum usbip_side side;
-	enum usbip_device_status status;
-
-	/* lock for status */
-	spinlock_t lock;
-
-	struct socket *tcp_socket;
-
-	struct task_struct *tcp_rx;
-	struct task_struct *tcp_tx;
-
-	unsigned long event;
-	struct task_struct *eh;
-	wait_queue_head_t eh_waitq;
-
-	struct eh_ops {
-		void (*shutdown)(struct usbip_device *);
-		void (*reset)(struct usbip_device *);
-		void (*unusable)(struct usbip_device *);
-	} eh_ops;
-};
-
-#define kthread_get_run(threadfn, data, namefmt, ...)			   \
-({									   \
-	struct task_struct *__k						   \
-		= kthread_create(threadfn, data, namefmt, ## __VA_ARGS__); \
-	if (!IS_ERR(__k)) {						   \
-		get_task_struct(__k);					   \
-		wake_up_process(__k);					   \
-	}								   \
-	__k;								   \
-})
-
-#define kthread_stop_put(k)		\
-	do {				\
-		kthread_stop(k);	\
-		put_task_struct(k);	\
-	} while (0)
-
-/* usbip_common.c */
-void usbip_dump_urb(struct urb *purb);
-void usbip_dump_header(struct usbip_header *pdu);
-
-int usbip_recv(struct socket *sock, void *buf, int size);
-
-void usbip_pack_pdu(struct usbip_header *pdu, struct urb *urb, int cmd,
-		    int pack);
-void usbip_header_correct_endian(struct usbip_header *pdu, int send);
-
-struct usbip_iso_packet_descriptor*
-usbip_alloc_iso_desc_pdu(struct urb *urb, ssize_t *bufflen);
-
-/* some members of urb must be substituted before. */
-int usbip_recv_iso(struct usbip_device *ud, struct urb *urb);
-void usbip_pad_iso(struct usbip_device *ud, struct urb *urb);
-int usbip_recv_xbuff(struct usbip_device *ud, struct urb *urb);
-
-/* usbip_event.c */
-int usbip_start_eh(struct usbip_device *ud);
-void usbip_stop_eh(struct usbip_device *ud);
-void usbip_event_add(struct usbip_device *ud, unsigned long event);
-int usbip_event_happened(struct usbip_device *ud);
-
-static inline int interface_to_busnum(struct usb_interface *interface)
-{
-	struct usb_device *udev = interface_to_usbdev(interface);
-
-	return udev->bus->busnum;
-}
-
-static inline int interface_to_devnum(struct usb_interface *interface)
-{
-	struct usb_device *udev = interface_to_usbdev(interface);
-
-	return udev->devnum;
-}
-
-#endif /* __USBIP_COMMON_H */
diff --git a/drivers/staging/usbip/usbip_event.c b/drivers/staging/usbip/usbip_event.c
deleted file mode 100644
index 64933b993d7a..000000000000
--- a/drivers/staging/usbip/usbip_event.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (C) 2003-2008 Takahiro Hirofuchi
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- * USA.
- */
-
-#include <linux/kthread.h>
-#include <linux/export.h>
-
-#include "usbip_common.h"
-
-static int event_handler(struct usbip_device *ud)
-{
-	usbip_dbg_eh("enter\n");
-
-	/*
-	 * Events are handled by only this thread.
-	 */
-	while (usbip_event_happened(ud)) {
-		usbip_dbg_eh("pending event %lx\n", ud->event);
-
-		/*
-		 * NOTE: shutdown must come first.
-		 * Shutdown the device.
-		 */
-		if (ud->event & USBIP_EH_SHUTDOWN) {
-			ud->eh_ops.shutdown(ud);
-			ud->event &= ~USBIP_EH_SHUTDOWN;
-		}
-
-		/* Reset the device. */
-		if (ud->event & USBIP_EH_RESET) {
-			ud->eh_ops.reset(ud);
-			ud->event &= ~USBIP_EH_RESET;
-		}
-
-		/* Mark the device as unusable. */
-		if (ud->event & USBIP_EH_UNUSABLE) {
-			ud->eh_ops.unusable(ud);
-			ud->event &= ~USBIP_EH_UNUSABLE;
-		}
-
-		/* Stop the error handler. */
-		if (ud->event & USBIP_EH_BYE)
-			return -1;
-	}
-
-	return 0;
-}
-
-static int event_handler_loop(void *data)
-{
-	struct usbip_device *ud = data;
-
-	while (!kthread_should_stop()) {
-		wait_event_interruptible(ud->eh_waitq,
-					 usbip_event_happened(ud) ||
-					 kthread_should_stop());
-		usbip_dbg_eh("wakeup\n");
-
-		if (event_handler(ud) < 0)
-			break;
-	}
-
-	return 0;
-}
-
-int usbip_start_eh(struct usbip_device *ud)
-{
-	init_waitqueue_head(&ud->eh_waitq);
-	ud->event = 0;
-
-	ud->eh = kthread_run(event_handler_loop, ud, "usbip_eh");
-	if (IS_ERR(ud->eh)) {
-		pr_warn("Unable to start control thread\n");
-		return PTR_ERR(ud->eh);
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(usbip_start_eh);
-
-void usbip_stop_eh(struct usbip_device *ud)
-{
-	if (ud->eh == current)
-		return; /* do not wait for myself */
-
-	kthread_stop(ud->eh);
-	usbip_dbg_eh("usbip_eh has finished\n");
-}
-EXPORT_SYMBOL_GPL(usbip_stop_eh);
-
-void usbip_event_add(struct usbip_device *ud, unsigned long event)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&ud->lock, flags);
-	ud->event |= event;
-	wake_up(&ud->eh_waitq);
-	spin_unlock_irqrestore(&ud->lock, flags);
-}
-EXPORT_SYMBOL_GPL(usbip_event_add);
-
-int usbip_event_happened(struct usbip_device *ud)
-{
-	int happened = 0;
-
-	spin_lock(&ud->lock);
-	if (ud->event != 0)
-		happened = 1;
-	spin_unlock(&ud->lock);
-
-	return happened;
-}
-EXPORT_SYMBOL_GPL(usbip_event_happened);
diff --git a/drivers/staging/usbip/usbip_protocol.txt b/drivers/staging/usbip/usbip_protocol.txt
deleted file mode 100644
index 16b6fe27284c..000000000000
--- a/drivers/staging/usbip/usbip_protocol.txt
+++ /dev/null
@@ -1,358 +0,0 @@
-PRELIMINARY DRAFT, MAY CONTAIN MISTAKES!
-28 Jun 2011
-
-The USB/IP protocol follows a server/client architecture. The server exports the
-USB devices and the clients imports them. The device driver for the exported
-USB device runs on the client machine.
-
-The client may ask for the list of the exported USB devices. To get the list the
-client opens a TCP/IP connection towards the server, and sends an OP_REQ_DEVLIST
-packet on top of the TCP/IP connection (so the actual OP_REQ_DEVLIST may be sent
-in one or more pieces at the low level transport layer). The server sends back
-the OP_REP_DEVLIST packet which lists the exported USB devices. Finally the
-TCP/IP connection is closed.
-
- virtual host controller                                 usb host
-      "client"                                           "server"
-  (imports USB devices)                             (exports USB devices)
-          |                                                 |
-          |                  OP_REQ_DEVLIST                 |
-          | ----------------------------------------------> |
-          |                                                 |
-          |                  OP_REP_DEVLIST                 |
-          | <---------------------------------------------- |
-          |                                                 |
-
-Once the client knows the list of exported USB devices it may decide to use one
-of them. First the client opens a TCP/IP connection towards the server and
-sends an OP_REQ_IMPORT packet. The server replies with OP_REP_IMPORT. If the
-import was successful the TCP/IP connection remains open and will be used
-to transfer the URB traffic between the client and the server. The client may
-send two types of packets: the USBIP_CMD_SUBMIT to submit an URB, and
-USBIP_CMD_UNLINK to unlink a previously submitted URB. The answers of the
-server may be USBIP_RET_SUBMIT and USBIP_RET_UNLINK respectively.
-
- virtual host controller                                 usb host
-      "client"                                           "server"
-  (imports USB devices)                             (exports USB devices)
-          |                                                 |
-          |                  OP_REQ_IMPORT                  |
-          | ----------------------------------------------> |
-          |                                                 |
-          |                  OP_REP_IMPORT                  |
-          | <---------------------------------------------- |
-          |                                                 |
-          |                                                 |
-          |            USBIP_CMD_SUBMIT(seqnum = n)         |
-          | ----------------------------------------------> |
-          |                                                 |
-          |            USBIP_RET_SUBMIT(seqnum = n)         |
-          | <---------------------------------------------- |
-          |                        .                        |
-          |                        :                        |
-          |                                                 |
-          |            USBIP_CMD_SUBMIT(seqnum = m)         |
-          | ----------------------------------------------> |
-          |                                                 |
-          |            USBIP_CMD_SUBMIT(seqnum = m+1)       |
-          | ----------------------------------------------> |
-          |                                                 |
-          |            USBIP_CMD_SUBMIT(seqnum = m+2)       |
-          | ----------------------------------------------> |
-          |                                                 |
-          |            USBIP_RET_SUBMIT(seqnum = m)         |
-          | <---------------------------------------------- |
-          |                                                 |
-          |            USBIP_CMD_SUBMIT(seqnum = m+3)       |
-          | ----------------------------------------------> |
-          |                                                 |
-          |            USBIP_RET_SUBMIT(seqnum = m+1)       |
-          | <---------------------------------------------- |
-          |                                                 |
-          |            USBIP_CMD_SUBMIT(seqnum = m+4)       |
-          | ----------------------------------------------> |
-          |                                                 |
-          |            USBIP_RET_SUBMIT(seqnum = m+2)       |
-          | <---------------------------------------------- |
-          |                        .                        |
-          |                        :                        |
-          |                                                 |
-          |               USBIP_CMD_UNLINK                  |
-          | ----------------------------------------------> |
-          |                                                 |
-          |               USBIP_RET_UNLINK                  |
-          | <---------------------------------------------- |
-          |                                                 |
-
-The fields are in network (big endian) byte order meaning that the most significant
-byte (MSB) is stored at the lowest address.
-
-
-OP_REQ_DEVLIST: Retrieve the list of exported USB devices.
-
- Offset    | Length | Value      | Description
------------+--------+------------+---------------------------------------------------
- 0         | 2      | 0x0100     | Binary-coded decimal USBIP version number: v1.0.0
------------+--------+------------+---------------------------------------------------
- 2         | 2      | 0x8005     | Command code: Retrieve the list of exported USB
-           |        |            |   devices.
------------+--------+------------+---------------------------------------------------
- 4         | 4      | 0x00000000 | Status: unused, shall be set to 0
-
-OP_REP_DEVLIST: Reply with the list of exported USB devices.
-
- Offset    | Length | Value      | Description
------------+--------+------------+---------------------------------------------------
- 0         | 2      | 0x0100     | Binary-coded decimal USBIP version number: v1.0.0.
------------+--------+------------+---------------------------------------------------
- 2         | 2      | 0x0005     | Reply code: The list of exported USB devices.
------------+--------+------------+---------------------------------------------------
- 4         | 4      | 0x00000000 | Status: 0 for OK
------------+--------+------------+---------------------------------------------------
- 8         | 4      | n          | Number of exported devices: 0 means no exported
-           |        |            |   devices.
------------+--------+------------+---------------------------------------------------
- 0x0C      |        |            | From now on the exported n devices are described,
-           |        |            |   if any. If no devices are exported the message
-           |        |            |   ends with the previous "number of exported
-           |        |            |   devices" field.
------------+--------+------------+---------------------------------------------------
-           | 256    |            | path: Path of the device on the host exporting the
-           |        |            |   USB device, string closed with zero byte, e.g.
-           |        |            |   "/sys/devices/pci0000:00/0000:00:1d.1/usb3/3-2"
-           |        |            |   The unused bytes shall be filled with zero
-           |        |            |   bytes.
------------+--------+------------+---------------------------------------------------
- 0x10C     | 32     |            | busid: Bus ID of the exported device, string
-           |        |            |   closed with zero byte, e.g. "3-2". The unused
-           |        |            |   bytes shall be filled with zero bytes.
------------+--------+------------+---------------------------------------------------
- 0x12C     | 4      |            | busnum
------------+--------+------------+---------------------------------------------------
- 0x130     | 4      |            | devnum
------------+--------+------------+---------------------------------------------------
- 0x134     | 4      |            | speed
------------+--------+------------+---------------------------------------------------
- 0x138     | 2      |            | idVendor
------------+--------+------------+---------------------------------------------------
- 0x13A     | 2      |            | idProduct
------------+--------+------------+---------------------------------------------------
- 0x13C     | 2      |            | bcdDevice
------------+--------+------------+---------------------------------------------------
- 0x13E     | 1      |            | bDeviceClass
------------+--------+------------+---------------------------------------------------
- 0x13F     | 1      |            | bDeviceSubClass
------------+--------+------------+---------------------------------------------------
- 0x140     | 1      |            | bDeviceProtocol
------------+--------+------------+---------------------------------------------------
- 0x141     | 1      |            | bConfigurationValue
------------+--------+------------+---------------------------------------------------
- 0x142     | 1      |            | bNumConfigurations
------------+--------+------------+---------------------------------------------------
- 0x143     | 1      |            | bNumInterfaces
------------+--------+------------+---------------------------------------------------
- 0x144     |        | m_0        | From now on each interface is described, all
-           |        |            |   together bNumInterfaces times, with the
-           |        |            |   the following 4 fields:
------------+--------+------------+---------------------------------------------------
-           | 1      |            | bInterfaceClass
------------+--------+------------+---------------------------------------------------
- 0x145     | 1      |            | bInterfaceSubClass
------------+--------+------------+---------------------------------------------------
- 0x146     | 1      |            | bInterfaceProtocol
------------+--------+------------+---------------------------------------------------
- 0x147     | 1      |            | padding byte for alignment, shall be set to zero
------------+--------+------------+---------------------------------------------------
- 0xC +     |        |            | The second exported USB device starts at i=1
- i*0x138 + |        |            | with the busid field.
- m_(i-1)*4 |        |            |
-
-OP_REQ_IMPORT: Request to import (attach) a remote USB device.
-
- Offset    | Length | Value      | Description
------------+--------+------------+---------------------------------------------------
- 0         | 2      | 0x0100     | Binary-coded decimal USBIP version number: v1.0.0
------------+--------+------------+---------------------------------------------------
- 2         | 2      | 0x8003     | Command code: import a remote USB device.
------------+--------+------------+---------------------------------------------------
- 4         | 4      | 0x00000000 | Status: unused, shall be set to 0
------------+--------+------------+---------------------------------------------------
- 8         | 32     |            | busid: the busid of the exported device on the
-           |        |            |   remote host. The possible values are taken
-           |        |            |   from the message field OP_REP_DEVLIST.busid.
-           |        |            |   A string closed with zero, the unused bytes
-           |        |            |   shall be filled with zeros.
------------+--------+------------+---------------------------------------------------
-
-OP_REP_IMPORT: Reply to import (attach) a remote USB device.
-
- Offset    | Length | Value      | Description
------------+--------+------------+---------------------------------------------------
- 0         | 2      | 0x0100     | Binary-coded decimal USBIP version number: v1.0.0
------------+--------+------------+---------------------------------------------------
- 2         | 2      | 0x0003     | Reply code: Reply to import.
------------+--------+------------+---------------------------------------------------
- 4         | 4      | 0x00000000 | Status: 0 for OK
-           |        |            |         1 for error
------------+--------+------------+---------------------------------------------------
- 8         |        |            | From now on comes the details of the imported
-           |        |            |   device, if the previous status field was OK (0),
-           |        |            |   otherwise the reply ends with the status field.
------------+--------+------------+---------------------------------------------------
-           | 256    |            | path: Path of the device on the host exporting the
-           |        |            |   USB device, string closed with zero byte, e.g.
-           |        |            |   "/sys/devices/pci0000:00/0000:00:1d.1/usb3/3-2"
-           |        |            |   The unused bytes shall be filled with zero
-           |        |            |   bytes.
------------+--------+------------+---------------------------------------------------
- 0x108     | 32     |            | busid: Bus ID of the exported device, string
-           |        |            |   closed with zero byte, e.g. "3-2". The unused
-           |        |            |   bytes shall be filled with zero bytes.
------------+--------+------------+---------------------------------------------------
- 0x128     | 4      |            | busnum
------------+--------+------------+---------------------------------------------------
- 0x12C     | 4      |            | devnum
------------+--------+------------+---------------------------------------------------
- 0x130     | 4      |            | speed
------------+--------+------------+---------------------------------------------------
- 0x134     | 2      |            | idVendor
------------+--------+------------+---------------------------------------------------
- 0x136     | 2      |            | idProduct
------------+--------+------------+---------------------------------------------------
- 0x138     | 2      |            | bcdDevice
------------+--------+------------+---------------------------------------------------
- 0x139     | 1      |            | bDeviceClass
------------+--------+------------+---------------------------------------------------
- 0x13A     | 1      |            | bDeviceSubClass
------------+--------+------------+---------------------------------------------------
- 0x13B     | 1      |            | bDeviceProtocol
------------+--------+------------+---------------------------------------------------
- 0x13C     | 1      |            | bConfigurationValue
------------+--------+------------+---------------------------------------------------
- 0x13D     | 1      |            | bNumConfigurations
------------+--------+------------+---------------------------------------------------
- 0x13E     | 1      |            | bNumInterfaces
-
-USBIP_CMD_SUBMIT: Submit an URB
-
- Offset    | Length | Value      | Description
------------+--------+------------+---------------------------------------------------
- 0         | 4      | 0x00000001 | command: Submit an URB
------------+--------+------------+---------------------------------------------------
- 4         | 4      |            | seqnum: the sequence number of the URB to submit
------------+--------+------------+---------------------------------------------------
- 8         | 4      |            | devid
------------+--------+------------+---------------------------------------------------
- 0xC       | 4      |            | direction: 0: USBIP_DIR_OUT
-           |        |            |            1: USBIP_DIR_IN
------------+--------+------------+---------------------------------------------------
- 0x10      | 4      |            | ep: endpoint number, possible values are: 0...15
------------+--------+------------+---------------------------------------------------
- 0x14      | 4      |            | transfer_flags: possible values depend on the
-           |        |            |   URB transfer type, see below
------------+--------+------------+---------------------------------------------------
- 0x18      | 4      |            | transfer_buffer_length
------------+--------+------------+---------------------------------------------------
- 0x1C      | 4      |            | start_frame: specify the selected frame to
-           |        |            |   transmit an ISO frame, ignored if URB_ISO_ASAP
-           |        |            |   is specified at transfer_flags
------------+--------+------------+---------------------------------------------------
- 0x20      | 4      |            | number_of_packets: number of ISO packets
------------+--------+------------+---------------------------------------------------
- 0x24      | 4      |            | interval: maximum time for the request on the
-           |        |            |   server-side host controller
------------+--------+------------+---------------------------------------------------
- 0x28      | 8      |            | setup: data bytes for USB setup, filled with
-           |        |            |   zeros if not used
------------+--------+------------+---------------------------------------------------
- 0x30      |        |            | URB data. For ISO transfers the padding between
-           |        |            |   each ISO packets is not transmitted.
-
-
-  Allowed transfer_flags  | value      | control | interrupt | bulk     | isochronous
- -------------------------+------------+---------+-----------+----------+-------------
-  URB_SHORT_NOT_OK        | 0x00000001 | only in | only in   | only in  | no
-  URB_ISO_ASAP            | 0x00000002 | no      | no        | no       | yes
-  URB_NO_TRANSFER_DMA_MAP | 0x00000004 | yes     | yes       | yes      | yes
-  URB_NO_FSBR             | 0x00000020 | yes     | no        | no       | no
-  URB_ZERO_PACKET         | 0x00000040 | no      | no        | only out | no
-  URB_NO_INTERRUPT        | 0x00000080 | yes     | yes       | yes      | yes
-  URB_FREE_BUFFER         | 0x00000100 | yes     | yes       | yes      | yes
-  URB_DIR_MASK            | 0x00000200 | yes     | yes       | yes      | yes
-
-
-USBIP_RET_SUBMIT: Reply for submitting an URB
-
- Offset    | Length | Value      | Description
------------+--------+------------+---------------------------------------------------
- 0         | 4      | 0x00000003 | command
------------+--------+------------+---------------------------------------------------
- 4         | 4      |            | seqnum: URB sequence number
------------+--------+------------+---------------------------------------------------
- 8         | 4      |            | devid
------------+--------+------------+---------------------------------------------------
- 0xC       | 4      |            | direction: 0: USBIP_DIR_OUT
-           |        |            |            1: USBIP_DIR_IN
------------+--------+------------+---------------------------------------------------
- 0x10      | 4      |            | ep: endpoint number
------------+--------+------------+---------------------------------------------------
- 0x14      | 4      |            | status: zero for successful URB transaction,
-           |        |            |   otherwise some kind of error happened.
------------+--------+------------+---------------------------------------------------
- 0x18      | 4      | n          | actual_length: number of URB data bytes
------------+--------+------------+---------------------------------------------------
- 0x1C      | 4      |            | start_frame: for an ISO frame the actually
-           |        |            |   selected frame for transmit.
------------+--------+------------+---------------------------------------------------
- 0x20      | 4      |            | number_of_packets
------------+--------+------------+---------------------------------------------------
- 0x24      | 4      |            | error_count
------------+--------+------------+---------------------------------------------------
- 0x28      | 8      |            | setup: data bytes for USB setup, filled with
-           |        |            |   zeros if not used
------------+--------+------------+---------------------------------------------------
- 0x30      | n      |            | URB data bytes. For ISO transfers the padding
-           |        |            |   between each ISO packets is not transmitted.
-
-USBIP_CMD_UNLINK: Unlink an URB
-
- Offset    | Length | Value      | Description
------------+--------+------------+---------------------------------------------------
- 0         | 4      | 0x00000002 | command: URB unlink command
------------+--------+------------+---------------------------------------------------
- 4         | 4      |            | seqnum: URB sequence number to unlink: FIXME: is this so?
------------+--------+------------+---------------------------------------------------
- 8         | 4      |            | devid
------------+--------+------------+---------------------------------------------------
- 0xC       | 4      |            | direction: 0: USBIP_DIR_OUT
-           |        |            |            1: USBIP_DIR_IN
------------+--------+------------+---------------------------------------------------
- 0x10      | 4      |            | ep: endpoint number: zero
------------+--------+------------+---------------------------------------------------
- 0x14      | 4      |            | seqnum: the URB sequence number given previously
-           |        |            |   at USBIP_CMD_SUBMIT.seqnum field
------------+--------+------------+---------------------------------------------------
- 0x30      | n      |            | URB data bytes. For ISO transfers the padding
-           |        |            |   between each ISO packets is not transmitted.
-
-USBIP_RET_UNLINK: Reply for URB unlink
-
- Offset    | Length | Value      | Description
------------+--------+------------+---------------------------------------------------
- 0         | 4      | 0x00000004 | command: reply for the URB unlink command
------------+--------+------------+---------------------------------------------------
- 4         | 4      |            | seqnum: the unlinked URB sequence number
------------+--------+------------+---------------------------------------------------
- 8         | 4      |            | devid
------------+--------+------------+---------------------------------------------------
- 0xC       | 4      |            | direction: 0: USBIP_DIR_OUT
-           |        |            |            1: USBIP_DIR_IN
------------+--------+------------+---------------------------------------------------
- 0x10      | 4      |            | ep: endpoint number
------------+--------+------------+---------------------------------------------------
- 0x14      | 4      |            | status: This is the value contained in the
-           |        |            |   urb->status in the URB completition handler.
-           |        |            |   FIXME: a better explanation needed.
------------+--------+------------+---------------------------------------------------
- 0x30      | n      |            | URB data bytes. For ISO transfers the padding
-           |        |            |   between each ISO packets is not transmitted.
diff --git a/drivers/staging/usbip/vhci.h b/drivers/staging/usbip/vhci.h
deleted file mode 100644
index a863a98a91ce..000000000000
--- a/drivers/staging/usbip/vhci.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (C) 2003-2008 Takahiro Hirofuchi
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-
-#ifndef __USBIP_VHCI_H
-#define __USBIP_VHCI_H
-
-#include <linux/device.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/sysfs.h>
-#include <linux/types.h>
-#include <linux/usb.h>
-#include <linux/usb/hcd.h>
-#include <linux/wait.h>
-
-struct vhci_device {
-	struct usb_device *udev;
-
-	/*
-	 * devid specifies a remote usb device uniquely instead
-	 * of combination of busnum and devnum.
-	 */
-	__u32 devid;
-
-	/* speed of a remote device */
-	enum usb_device_speed speed;
-
-	/* vhci root-hub port to which this device is attached */
-	__u32 rhport;
-
-	struct usbip_device ud;
-
-	/* lock for the below link lists */
-	spinlock_t priv_lock;
-
-	/* vhci_priv is linked to one of them. */
-	struct list_head priv_tx;
-	struct list_head priv_rx;
-
-	/* vhci_unlink is linked to one of them */
-	struct list_head unlink_tx;
-	struct list_head unlink_rx;
-
-	/* vhci_tx thread sleeps for this queue */
-	wait_queue_head_t waitq_tx;
-};
-
-/* urb->hcpriv, use container_of() */
-struct vhci_priv {
-	unsigned long seqnum;
-	struct list_head list;
-
-	struct vhci_device *vdev;
-	struct urb *urb;
-};
-
-struct vhci_unlink {
-	/* seqnum of this request */
-	unsigned long seqnum;
-
-	struct list_head list;
-
-	/* seqnum of the unlink target */
-	unsigned long unlink_seqnum;
-};
-
-/* Number of supported ports. Value has an upperbound of USB_MAXCHILDREN */
-#define VHCI_NPORTS 8
-
-/* for usb_bus.hcpriv */
-struct vhci_hcd {
-	spinlock_t lock;
-
-	u32 port_status[VHCI_NPORTS];
-
-	unsigned resuming:1;
-	unsigned long re_timeout;
-
-	atomic_t seqnum;
-
-	/*
-	 * NOTE:
-	 * wIndex shows the port number and begins from 1.
-	 * But, the index of this array begins from 0.
-	 */
-	struct vhci_device vdev[VHCI_NPORTS];
-};
-
-extern struct vhci_hcd *the_controller;
-extern const struct attribute_group dev_attr_group;
-
-/* vhci_hcd.c */
-void rh_port_connect(int rhport, enum usb_device_speed speed);
-
-/* vhci_rx.c */
-struct urb *pickup_urb_and_free_priv(struct vhci_device *vdev, __u32 seqnum);
-int vhci_rx_loop(void *data);
-
-/* vhci_tx.c */
-int vhci_tx_loop(void *data);
-
-static inline struct vhci_device *port_to_vdev(__u32 port)
-{
-	return &the_controller->vdev[port];
-}
-
-static inline struct vhci_hcd *hcd_to_vhci(struct usb_hcd *hcd)
-{
-	return (struct vhci_hcd *) (hcd->hcd_priv);
-}
-
-static inline struct usb_hcd *vhci_to_hcd(struct vhci_hcd *vhci)
-{
-	return container_of((void *) vhci, struct usb_hcd, hcd_priv);
-}
-
-static inline struct device *vhci_dev(struct vhci_hcd *vhci)
-{
-	return vhci_to_hcd(vhci)->self.controller;
-}
-
-#endif /* __USBIP_VHCI_H */
diff --git a/drivers/staging/usbip/vhci_hcd.c b/drivers/staging/usbip/vhci_hcd.c
deleted file mode 100644
index c02374b6049c..000000000000
--- a/drivers/staging/usbip/vhci_hcd.c
+++ /dev/null
@@ -1,1171 +0,0 @@
-/*
- * Copyright (C) 2003-2008 Takahiro Hirofuchi
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- * USA.
- */
-
-#include <linux/init.h>
-#include <linux/file.h>
-#include <linux/kernel.h>
-#include <linux/kthread.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-
-#include "usbip_common.h"
-#include "vhci.h"
-
-#define DRIVER_AUTHOR "Takahiro Hirofuchi"
-#define DRIVER_DESC "USB/IP 'Virtual' Host Controller (VHCI) Driver"
-
-/*
- * TODO
- *	- update root hub emulation
- *	- move the emulation code to userland ?
- *		porting to other operating systems
- *		minimize kernel code
- *	- add suspend/resume code
- *	- clean up everything
- */
-
-/* See usb gadget dummy hcd */
-
-static int vhci_hub_status(struct usb_hcd *hcd, char *buff);
-static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
-			    u16 wIndex, char *buff, u16 wLength);
-static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
-			    gfp_t mem_flags);
-static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status);
-static int vhci_start(struct usb_hcd *vhci_hcd);
-static void vhci_stop(struct usb_hcd *hcd);
-static int vhci_get_frame_number(struct usb_hcd *hcd);
-
-static const char driver_name[] = "vhci_hcd";
-static const char driver_desc[] = "USB/IP Virtual Host Controller";
-
-struct vhci_hcd *the_controller;
-
-static const char * const bit_desc[] = {
-	"CONNECTION",		/*0*/
-	"ENABLE",		/*1*/
-	"SUSPEND",		/*2*/
-	"OVER_CURRENT",		/*3*/
-	"RESET",		/*4*/
-	"R5",			/*5*/
-	"R6",			/*6*/
-	"R7",			/*7*/
-	"POWER",		/*8*/
-	"LOWSPEED",		/*9*/
-	"HIGHSPEED",		/*10*/
-	"PORT_TEST",		/*11*/
-	"INDICATOR",		/*12*/
-	"R13",			/*13*/
-	"R14",			/*14*/
-	"R15",			/*15*/
-	"C_CONNECTION",		/*16*/
-	"C_ENABLE",		/*17*/
-	"C_SUSPEND",		/*18*/
-	"C_OVER_CURRENT",	/*19*/
-	"C_RESET",		/*20*/
-	"R21",			/*21*/
-	"R22",			/*22*/
-	"R23",			/*23*/
-	"R24",			/*24*/
-	"R25",			/*25*/
-	"R26",			/*26*/
-	"R27",			/*27*/
-	"R28",			/*28*/
-	"R29",			/*29*/
-	"R30",			/*30*/
-	"R31",			/*31*/
-};
-
-static void dump_port_status_diff(u32 prev_status, u32 new_status)
-{
-	int i = 0;
-	u32 bit = 1;
-
-	pr_debug("status prev -> new: %08x -> %08x\n", prev_status, new_status);
-	while (bit) {
-		u32 prev = prev_status & bit;
-		u32 new = new_status & bit;
-		char change;
-
-		if (!prev && new)
-			change = '+';
-		else if (prev && !new)
-			change = '-';
-		else
-			change = ' ';
-
-		if (prev || new)
-			pr_debug(" %c%s\n", change, bit_desc[i]);
-		bit <<= 1;
-		i++;
-	}
-	pr_debug("\n");
-}
-
-void rh_port_connect(int rhport, enum usb_device_speed speed)
-{
-	usbip_dbg_vhci_rh("rh_port_connect %d\n", rhport);
-
-	spin_lock(&the_controller->lock);
-
-	the_controller->port_status[rhport] |= USB_PORT_STAT_CONNECTION
-		| (1 << USB_PORT_FEAT_C_CONNECTION);
-
-	switch (speed) {
-	case USB_SPEED_HIGH:
-		the_controller->port_status[rhport] |= USB_PORT_STAT_HIGH_SPEED;
-		break;
-	case USB_SPEED_LOW:
-		the_controller->port_status[rhport] |= USB_PORT_STAT_LOW_SPEED;
-		break;
-	default:
-		break;
-	}
-
-	spin_unlock(&the_controller->lock);
-
-	usb_hcd_poll_rh_status(vhci_to_hcd(the_controller));
-}
-
-static void rh_port_disconnect(int rhport)
-{
-	usbip_dbg_vhci_rh("rh_port_disconnect %d\n", rhport);
-
-	spin_lock(&the_controller->lock);
-
-	the_controller->port_status[rhport] &= ~USB_PORT_STAT_CONNECTION;
-	the_controller->port_status[rhport] |=
-					(1 << USB_PORT_FEAT_C_CONNECTION);
-
-	spin_unlock(&the_controller->lock);
-	usb_hcd_poll_rh_status(vhci_to_hcd(the_controller));
-}
-
-#define PORT_C_MASK				\
-	((USB_PORT_STAT_C_CONNECTION		\
-	  | USB_PORT_STAT_C_ENABLE		\
-	  | USB_PORT_STAT_C_SUSPEND		\
-	  | USB_PORT_STAT_C_OVERCURRENT		\
-	  | USB_PORT_STAT_C_RESET) << 16)
-
-/*
- * Returns 0 if the status hasn't changed, or the number of bytes in buf.
- * Ports are 0-indexed from the HCD point of view,
- * and 1-indexed from the USB core pointer of view.
- *
- * @buf: a bitmap to show which port status has been changed.
- *  bit  0: reserved
- *  bit  1: the status of port 0 has been changed.
- *  bit  2: the status of port 1 has been changed.
- *  ...
- */
-static int vhci_hub_status(struct usb_hcd *hcd, char *buf)
-{
-	struct vhci_hcd	*vhci;
-	int		retval;
-	int		rhport;
-	int		changed = 0;
-
-	retval = DIV_ROUND_UP(VHCI_NPORTS + 1, 8);
-	memset(buf, 0, retval);
-
-	vhci = hcd_to_vhci(hcd);
-
-	spin_lock(&vhci->lock);
-	if (!HCD_HW_ACCESSIBLE(hcd)) {
-		usbip_dbg_vhci_rh("hw accessible flag not on?\n");
-		goto done;
-	}
-
-	/* check pseudo status register for each port */
-	for (rhport = 0; rhport < VHCI_NPORTS; rhport++) {
-		if ((vhci->port_status[rhport] & PORT_C_MASK)) {
-			/* The status of a port has been changed, */
-			usbip_dbg_vhci_rh("port %d status changed\n", rhport);
-
-			buf[(rhport + 1) / 8] |= 1 << (rhport + 1) % 8;
-			changed = 1;
-		}
-	}
-
-	if ((hcd->state == HC_STATE_SUSPENDED) && (changed == 1))
-		usb_hcd_resume_root_hub(hcd);
-
-done:
-	spin_unlock(&vhci->lock);
-	return changed ? retval : 0;
-}
-
-static inline void hub_descriptor(struct usb_hub_descriptor *desc)
-{
-	memset(desc, 0, sizeof(*desc));
-	desc->bDescriptorType = 0x29;
-	desc->bDescLength = 9;
-	desc->wHubCharacteristics = (__constant_cpu_to_le16(0x0001));
-	desc->bNbrPorts = VHCI_NPORTS;
-	desc->u.hs.DeviceRemovable[0] = 0xff;
-	desc->u.hs.DeviceRemovable[1] = 0xff;
-}
-
-static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
-			    u16 wIndex, char *buf, u16 wLength)
-{
-	struct vhci_hcd	*dum;
-	int             retval = 0;
-	int		rhport;
-
-	u32 prev_port_status[VHCI_NPORTS];
-
-	if (!HCD_HW_ACCESSIBLE(hcd))
-		return -ETIMEDOUT;
-
-	/*
-	 * NOTE:
-	 * wIndex shows the port number and begins from 1.
-	 */
-	usbip_dbg_vhci_rh("typeReq %x wValue %x wIndex %x\n", typeReq, wValue,
-			  wIndex);
-	if (wIndex > VHCI_NPORTS)
-		pr_err("invalid port number %d\n", wIndex);
-	rhport = ((__u8)(wIndex & 0x00ff)) - 1;
-
-	dum = hcd_to_vhci(hcd);
-
-	spin_lock(&dum->lock);
-
-	/* store old status and compare now and old later */
-	if (usbip_dbg_flag_vhci_rh) {
-		memcpy(prev_port_status, dum->port_status,
-			sizeof(prev_port_status));
-	}
-
-	switch (typeReq) {
-	case ClearHubFeature:
-		usbip_dbg_vhci_rh(" ClearHubFeature\n");
-		break;
-	case ClearPortFeature:
-		switch (wValue) {
-		case USB_PORT_FEAT_SUSPEND:
-			if (dum->port_status[rhport] & USB_PORT_STAT_SUSPEND) {
-				/* 20msec signaling */
-				dum->resuming = 1;
-				dum->re_timeout =
-					jiffies + msecs_to_jiffies(20);
-			}
-			break;
-		case USB_PORT_FEAT_POWER:
-			usbip_dbg_vhci_rh(
-				" ClearPortFeature: USB_PORT_FEAT_POWER\n");
-			dum->port_status[rhport] = 0;
-			dum->resuming = 0;
-			break;
-		case USB_PORT_FEAT_C_RESET:
-			usbip_dbg_vhci_rh(
-				" ClearPortFeature: USB_PORT_FEAT_C_RESET\n");
-			switch (dum->vdev[rhport].speed) {
-			case USB_SPEED_HIGH:
-				dum->port_status[rhport] |=
-					USB_PORT_STAT_HIGH_SPEED;
-				break;
-			case USB_SPEED_LOW:
-				dum->port_status[rhport] |=
-					USB_PORT_STAT_LOW_SPEED;
-				break;
-			default:
-				break;
-			}
-		default:
-			usbip_dbg_vhci_rh(" ClearPortFeature: default %x\n",
-					  wValue);
-			dum->port_status[rhport] &= ~(1 << wValue);
-			break;
-		}
-		break;
-	case GetHubDescriptor:
-		usbip_dbg_vhci_rh(" GetHubDescriptor\n");
-		hub_descriptor((struct usb_hub_descriptor *) buf);
-		break;
-	case GetHubStatus:
-		usbip_dbg_vhci_rh(" GetHubStatus\n");
-		*(__le32 *) buf = cpu_to_le32(0);
-		break;
-	case GetPortStatus:
-		usbip_dbg_vhci_rh(" GetPortStatus port %x\n", wIndex);
-		if (wIndex > VHCI_NPORTS || wIndex < 1) {
-			pr_err("invalid port number %d\n", wIndex);
-			retval = -EPIPE;
-		}
-
-		/* we do not care about resume. */
-
-		/* whoever resets or resumes must GetPortStatus to
-		 * complete it!!
-		 */
-		if (dum->resuming && time_after(jiffies, dum->re_timeout)) {
-			dum->port_status[rhport] |=
-				(1 << USB_PORT_FEAT_C_SUSPEND);
-			dum->port_status[rhport] &=
-				~(1 << USB_PORT_FEAT_SUSPEND);
-			dum->resuming = 0;
-			dum->re_timeout = 0;
-		}
-
-		if ((dum->port_status[rhport] & (1 << USB_PORT_FEAT_RESET)) !=
-		    0 && time_after(jiffies, dum->re_timeout)) {
-			dum->port_status[rhport] |=
-				(1 << USB_PORT_FEAT_C_RESET);
-			dum->port_status[rhport] &=
-				~(1 << USB_PORT_FEAT_RESET);
-			dum->re_timeout = 0;
-
-			if (dum->vdev[rhport].ud.status ==
-			    VDEV_ST_NOTASSIGNED) {
-				usbip_dbg_vhci_rh(
-					" enable rhport %d (status %u)\n",
-					rhport,
-					dum->vdev[rhport].ud.status);
-				dum->port_status[rhport] |=
-					USB_PORT_STAT_ENABLE;
-			}
-		}
-		((__le16 *) buf)[0] = cpu_to_le16(dum->port_status[rhport]);
-		((__le16 *) buf)[1] =
-			cpu_to_le16(dum->port_status[rhport] >> 16);
-
-		usbip_dbg_vhci_rh(" GetPortStatus bye %x %x\n", ((u16 *)buf)[0],
-				  ((u16 *)buf)[1]);
-		break;
-	case SetHubFeature:
-		usbip_dbg_vhci_rh(" SetHubFeature\n");
-		retval = -EPIPE;
-		break;
-	case SetPortFeature:
-		switch (wValue) {
-		case USB_PORT_FEAT_SUSPEND:
-			usbip_dbg_vhci_rh(
-				" SetPortFeature: USB_PORT_FEAT_SUSPEND\n");
-			break;
-		case USB_PORT_FEAT_RESET:
-			usbip_dbg_vhci_rh(
-				" SetPortFeature: USB_PORT_FEAT_RESET\n");
-			/* if it's already running, disconnect first */
-			if (dum->port_status[rhport] & USB_PORT_STAT_ENABLE) {
-				dum->port_status[rhport] &=
-					~(USB_PORT_STAT_ENABLE |
-					  USB_PORT_STAT_LOW_SPEED |
-					  USB_PORT_STAT_HIGH_SPEED);
-				/* FIXME test that code path! */
-			}
-			/* 50msec reset signaling */
-			dum->re_timeout = jiffies + msecs_to_jiffies(50);
-
-			/* FALLTHROUGH */
-		default:
-			usbip_dbg_vhci_rh(" SetPortFeature: default %d\n",
-					  wValue);
-			dum->port_status[rhport] |= (1 << wValue);
-			break;
-		}
-		break;
-
-	default:
-		pr_err("default: no such request\n");
-
-		/* "protocol stall" on error */
-		retval = -EPIPE;
-	}
-
-	if (usbip_dbg_flag_vhci_rh) {
-		pr_debug("port %d\n", rhport);
-		/* Only dump valid port status */
-		if (rhport >= 0) {
-			dump_port_status_diff(prev_port_status[rhport],
-					      dum->port_status[rhport]);
-		}
-	}
-	usbip_dbg_vhci_rh(" bye\n");
-
-	spin_unlock(&dum->lock);
-
-	return retval;
-}
-
-static struct vhci_device *get_vdev(struct usb_device *udev)
-{
-	int i;
-
-	if (!udev)
-		return NULL;
-
-	for (i = 0; i < VHCI_NPORTS; i++)
-		if (the_controller->vdev[i].udev == udev)
-			return port_to_vdev(i);
-
-	return NULL;
-}
-
-static void vhci_tx_urb(struct urb *urb)
-{
-	struct vhci_device *vdev = get_vdev(urb->dev);
-	struct vhci_priv *priv;
-
-	if (!vdev) {
-		pr_err("could not get virtual device");
-		return;
-	}
-
-	priv = kzalloc(sizeof(struct vhci_priv), GFP_ATOMIC);
-	if (!priv) {
-		usbip_event_add(&vdev->ud, VDEV_EVENT_ERROR_MALLOC);
-		return;
-	}
-
-	spin_lock(&vdev->priv_lock);
-
-	priv->seqnum = atomic_inc_return(&the_controller->seqnum);
-	if (priv->seqnum == 0xffff)
-		dev_info(&urb->dev->dev, "seqnum max\n");
-
-	priv->vdev = vdev;
-	priv->urb = urb;
-
-	urb->hcpriv = (void *) priv;
-
-	list_add_tail(&priv->list, &vdev->priv_tx);
-
-	wake_up(&vdev->waitq_tx);
-	spin_unlock(&vdev->priv_lock);
-}
-
-static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
-			    gfp_t mem_flags)
-{
-	struct device *dev = &urb->dev->dev;
-	int ret = 0;
-	struct vhci_device *vdev;
-
-	usbip_dbg_vhci_hc("enter, usb_hcd %p urb %p mem_flags %d\n",
-			  hcd, urb, mem_flags);
-
-	/* patch to usb_sg_init() is in 2.5.60 */
-	BUG_ON(!urb->transfer_buffer && urb->transfer_buffer_length);
-
-	spin_lock(&the_controller->lock);
-
-	if (urb->status != -EINPROGRESS) {
-		dev_err(dev, "URB already unlinked!, status %d\n", urb->status);
-		spin_unlock(&the_controller->lock);
-		return urb->status;
-	}
-
-	vdev = port_to_vdev(urb->dev->portnum-1);
-
-	/* refuse enqueue for dead connection */
-	spin_lock(&vdev->ud.lock);
-	if (vdev->ud.status == VDEV_ST_NULL ||
-	    vdev->ud.status == VDEV_ST_ERROR) {
-		dev_err(dev, "enqueue for inactive port %d\n", vdev->rhport);
-		spin_unlock(&vdev->ud.lock);
-		spin_unlock(&the_controller->lock);
-		return -ENODEV;
-	}
-	spin_unlock(&vdev->ud.lock);
-
-	ret = usb_hcd_link_urb_to_ep(hcd, urb);
-	if (ret)
-		goto no_need_unlink;
-
-	/*
-	 * The enumeration process is as follows;
-	 *
-	 *  1. Get_Descriptor request to DevAddrs(0) EndPoint(0)
-	 *     to get max packet length of default pipe
-	 *
-	 *  2. Set_Address request to DevAddr(0) EndPoint(0)
-	 *
-	 */
-	if (usb_pipedevice(urb->pipe) == 0) {
-		__u8 type = usb_pipetype(urb->pipe);
-		struct usb_ctrlrequest *ctrlreq =
-			(struct usb_ctrlrequest *) urb->setup_packet;
-
-		if (type != PIPE_CONTROL || !ctrlreq) {
-			dev_err(dev, "invalid request to devnum 0\n");
-			ret = -EINVAL;
-			goto no_need_xmit;
-		}
-
-		switch (ctrlreq->bRequest) {
-		case USB_REQ_SET_ADDRESS:
-			/* set_address may come when a device is reset */
-			dev_info(dev, "SetAddress Request (%d) to port %d\n",
-				 ctrlreq->wValue, vdev->rhport);
-
-			if (vdev->udev)
-				usb_put_dev(vdev->udev);
-			vdev->udev = usb_get_dev(urb->dev);
-
-			spin_lock(&vdev->ud.lock);
-			vdev->ud.status = VDEV_ST_USED;
-			spin_unlock(&vdev->ud.lock);
-
-			if (urb->status == -EINPROGRESS) {
-				/* This request is successfully completed. */
-				/* If not -EINPROGRESS, possibly unlinked. */
-				urb->status = 0;
-			}
-
-			goto no_need_xmit;
-
-		case USB_REQ_GET_DESCRIPTOR:
-			if (ctrlreq->wValue == cpu_to_le16(USB_DT_DEVICE << 8))
-				usbip_dbg_vhci_hc(
-					"Not yet?:Get_Descriptor to device 0 (get max pipe size)\n");
-
-			if (vdev->udev)
-				usb_put_dev(vdev->udev);
-			vdev->udev = usb_get_dev(urb->dev);
-			goto out;
-
-		default:
-			/* NOT REACHED */
-			dev_err(dev,
-				"invalid request to devnum 0 bRequest %u, wValue %u\n",
-				ctrlreq->bRequest,
-				ctrlreq->wValue);
-			ret =  -EINVAL;
-			goto no_need_xmit;
-		}
-
-	}
-
-out:
-	vhci_tx_urb(urb);
-	spin_unlock(&the_controller->lock);
-
-	return 0;
-
-no_need_xmit:
-	usb_hcd_unlink_urb_from_ep(hcd, urb);
-no_need_unlink:
-	spin_unlock(&the_controller->lock);
-	usb_hcd_giveback_urb(vhci_to_hcd(the_controller), urb, urb->status);
-	return ret;
-}
-
-/*
- * vhci_rx gives back the urb after receiving the reply of the urb.  If an
- * unlink pdu is sent or not, vhci_rx receives a normal return pdu and gives
- * back its urb. For the driver unlinking the urb, the content of the urb is
- * not important, but the calling to its completion handler is important; the
- * completion of unlinking is notified by the completion handler.
- *
- *
- * CLIENT SIDE
- *
- * - When vhci_hcd receives RET_SUBMIT,
- *
- *	- case 1a). the urb of the pdu is not unlinking.
- *		- normal case
- *		=> just give back the urb
- *
- *	- case 1b). the urb of the pdu is unlinking.
- *		- usbip.ko will return a reply of the unlinking request.
- *		=> give back the urb now and go to case 2b).
- *
- * - When vhci_hcd receives RET_UNLINK,
- *
- *	- case 2a). a submit request is still pending in vhci_hcd.
- *		- urb was really pending in usbip.ko and urb_unlink_urb() was
- *		  completed there.
- *		=> free a pending submit request
- *		=> notify unlink completeness by giving back the urb
- *
- *	- case 2b). a submit request is *not* pending in vhci_hcd.
- *		- urb was already given back to the core driver.
- *		=> do not give back the urb
- *
- *
- * SERVER SIDE
- *
- * - When usbip receives CMD_UNLINK,
- *
- *	- case 3a). the urb of the unlink request is now in submission.
- *		=> do usb_unlink_urb().
- *		=> after the unlink is completed, send RET_UNLINK.
- *
- *	- case 3b). the urb of the unlink request is not in submission.
- *		- may be already completed or never be received
- *		=> send RET_UNLINK
- *
- */
-static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
-{
-	struct vhci_priv *priv;
-	struct vhci_device *vdev;
-
-	pr_info("dequeue a urb %p\n", urb);
-
-	spin_lock(&the_controller->lock);
-
-	priv = urb->hcpriv;
-	if (!priv) {
-		/* URB was never linked! or will be soon given back by
-		 * vhci_rx. */
-		spin_unlock(&the_controller->lock);
-		return 0;
-	}
-
-	{
-		int ret = 0;
-
-		ret = usb_hcd_check_unlink_urb(hcd, urb, status);
-		if (ret) {
-			spin_unlock(&the_controller->lock);
-			return ret;
-		}
-	}
-
-	 /* send unlink request here? */
-	vdev = priv->vdev;
-
-	if (!vdev->ud.tcp_socket) {
-		/* tcp connection is closed */
-		spin_lock(&vdev->priv_lock);
-
-		pr_info("device %p seems to be disconnected\n", vdev);
-		list_del(&priv->list);
-		kfree(priv);
-		urb->hcpriv = NULL;
-
-		spin_unlock(&vdev->priv_lock);
-
-		/*
-		 * If tcp connection is alive, we have sent CMD_UNLINK.
-		 * vhci_rx will receive RET_UNLINK and give back the URB.
-		 * Otherwise, we give back it here.
-		 */
-		pr_info("gives back urb %p\n", urb);
-
-		usb_hcd_unlink_urb_from_ep(hcd, urb);
-
-		spin_unlock(&the_controller->lock);
-		usb_hcd_giveback_urb(vhci_to_hcd(the_controller), urb,
-				     urb->status);
-		spin_lock(&the_controller->lock);
-
-	} else {
-		/* tcp connection is alive */
-		struct vhci_unlink *unlink;
-
-		spin_lock(&vdev->priv_lock);
-
-		/* setup CMD_UNLINK pdu */
-		unlink = kzalloc(sizeof(struct vhci_unlink), GFP_ATOMIC);
-		if (!unlink) {
-			spin_unlock(&vdev->priv_lock);
-			spin_unlock(&the_controller->lock);
-			usbip_event_add(&vdev->ud, VDEV_EVENT_ERROR_MALLOC);
-			return -ENOMEM;
-		}
-
-		unlink->seqnum = atomic_inc_return(&the_controller->seqnum);
-		if (unlink->seqnum == 0xffff)
-			pr_info("seqnum max\n");
-
-		unlink->unlink_seqnum = priv->seqnum;
-
-		pr_info("device %p seems to be still connected\n", vdev);
-
-		/* send cmd_unlink and try to cancel the pending URB in the
-		 * peer */
-		list_add_tail(&unlink->list, &vdev->unlink_tx);
-		wake_up(&vdev->waitq_tx);
-
-		spin_unlock(&vdev->priv_lock);
-	}
-
-	spin_unlock(&the_controller->lock);
-
-	usbip_dbg_vhci_hc("leave\n");
-	return 0;
-}
-
-static void vhci_device_unlink_cleanup(struct vhci_device *vdev)
-{
-	struct vhci_unlink *unlink, *tmp;
-
-	spin_lock(&the_controller->lock);
-	spin_lock(&vdev->priv_lock);
-
-	list_for_each_entry_safe(unlink, tmp, &vdev->unlink_tx, list) {
-		pr_info("unlink cleanup tx %lu\n", unlink->unlink_seqnum);
-		list_del(&unlink->list);
-		kfree(unlink);
-	}
-
-	while (!list_empty(&vdev->unlink_rx)) {
-		struct urb *urb;
-
-		unlink = list_first_entry(&vdev->unlink_rx, struct vhci_unlink,
-			list);
-
-		/* give back URB of unanswered unlink request */
-		pr_info("unlink cleanup rx %lu\n", unlink->unlink_seqnum);
-
-		urb = pickup_urb_and_free_priv(vdev, unlink->unlink_seqnum);
-		if (!urb) {
-			pr_info("the urb (seqnum %lu) was already given back\n",
-				unlink->unlink_seqnum);
-			list_del(&unlink->list);
-			kfree(unlink);
-			continue;
-		}
-
-		urb->status = -ENODEV;
-
-		usb_hcd_unlink_urb_from_ep(vhci_to_hcd(the_controller), urb);
-
-		list_del(&unlink->list);
-
-		spin_unlock(&vdev->priv_lock);
-		spin_unlock(&the_controller->lock);
-
-		usb_hcd_giveback_urb(vhci_to_hcd(the_controller), urb,
-				     urb->status);
-
-		spin_lock(&the_controller->lock);
-		spin_lock(&vdev->priv_lock);
-
-		kfree(unlink);
-	}
-
-	spin_unlock(&vdev->priv_lock);
-	spin_unlock(&the_controller->lock);
-}
-
-/*
- * The important thing is that only one context begins cleanup.
- * This is why error handling and cleanup become simple.
- * We do not want to consider race condition as possible.
- */
-static void vhci_shutdown_connection(struct usbip_device *ud)
-{
-	struct vhci_device *vdev = container_of(ud, struct vhci_device, ud);
-
-	/* need this? see stub_dev.c */
-	if (ud->tcp_socket) {
-		pr_debug("shutdown tcp_socket %p\n", ud->tcp_socket);
-		kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR);
-	}
-
-	/* kill threads related to this sdev */
-	if (vdev->ud.tcp_rx) {
-		kthread_stop_put(vdev->ud.tcp_rx);
-		vdev->ud.tcp_rx = NULL;
-	}
-	if (vdev->ud.tcp_tx) {
-		kthread_stop_put(vdev->ud.tcp_tx);
-		vdev->ud.tcp_tx = NULL;
-	}
-	pr_info("stop threads\n");
-
-	/* active connection is closed */
-	if (vdev->ud.tcp_socket) {
-		sockfd_put(vdev->ud.tcp_socket);
-		vdev->ud.tcp_socket = NULL;
-	}
-	pr_info("release socket\n");
-
-	vhci_device_unlink_cleanup(vdev);
-
-	/*
-	 * rh_port_disconnect() is a trigger of ...
-	 *   usb_disable_device():
-	 *	disable all the endpoints for a USB device.
-	 *   usb_disable_endpoint():
-	 *	disable endpoints. pending urbs are unlinked(dequeued).
-	 *
-	 * NOTE: After calling rh_port_disconnect(), the USB device drivers of a
-	 * detached device should release used urbs in a cleanup function (i.e.
-	 * xxx_disconnect()). Therefore, vhci_hcd does not need to release
-	 * pushed urbs and their private data in this function.
-	 *
-	 * NOTE: vhci_dequeue() must be considered carefully. When shutting down
-	 * a connection, vhci_shutdown_connection() expects vhci_dequeue()
-	 * gives back pushed urbs and frees their private data by request of
-	 * the cleanup function of a USB driver. When unlinking a urb with an
-	 * active connection, vhci_dequeue() does not give back the urb which
-	 * is actually given back by vhci_rx after receiving its return pdu.
-	 *
-	 */
-	rh_port_disconnect(vdev->rhport);
-
-	pr_info("disconnect device\n");
-}
-
-
-static void vhci_device_reset(struct usbip_device *ud)
-{
-	struct vhci_device *vdev = container_of(ud, struct vhci_device, ud);
-
-	spin_lock(&ud->lock);
-
-	vdev->speed  = 0;
-	vdev->devid  = 0;
-
-	if (vdev->udev)
-		usb_put_dev(vdev->udev);
-	vdev->udev = NULL;
-
-	if (ud->tcp_socket) {
-		sockfd_put(ud->tcp_socket);
-		ud->tcp_socket = NULL;
-	}
-	ud->status = VDEV_ST_NULL;
-
-	spin_unlock(&ud->lock);
-}
-
-static void vhci_device_unusable(struct usbip_device *ud)
-{
-	spin_lock(&ud->lock);
-	ud->status = VDEV_ST_ERROR;
-	spin_unlock(&ud->lock);
-}
-
-static void vhci_device_init(struct vhci_device *vdev)
-{
-	memset(vdev, 0, sizeof(*vdev));
-
-	vdev->ud.side   = USBIP_VHCI;
-	vdev->ud.status = VDEV_ST_NULL;
-	spin_lock_init(&vdev->ud.lock);
-
-	INIT_LIST_HEAD(&vdev->priv_rx);
-	INIT_LIST_HEAD(&vdev->priv_tx);
-	INIT_LIST_HEAD(&vdev->unlink_tx);
-	INIT_LIST_HEAD(&vdev->unlink_rx);
-	spin_lock_init(&vdev->priv_lock);
-
-	init_waitqueue_head(&vdev->waitq_tx);
-
-	vdev->ud.eh_ops.shutdown = vhci_shutdown_connection;
-	vdev->ud.eh_ops.reset = vhci_device_reset;
-	vdev->ud.eh_ops.unusable = vhci_device_unusable;
-
-	usbip_start_eh(&vdev->ud);
-}
-
-static int vhci_start(struct usb_hcd *hcd)
-{
-	struct vhci_hcd *vhci = hcd_to_vhci(hcd);
-	int rhport;
-	int err = 0;
-
-	usbip_dbg_vhci_hc("enter vhci_start\n");
-
-	/* initialize private data of usb_hcd */
-
-	for (rhport = 0; rhport < VHCI_NPORTS; rhport++) {
-		struct vhci_device *vdev = &vhci->vdev[rhport];
-
-		vhci_device_init(vdev);
-		vdev->rhport = rhport;
-	}
-
-	atomic_set(&vhci->seqnum, 0);
-	spin_lock_init(&vhci->lock);
-
-	hcd->power_budget = 0; /* no limit */
-	hcd->uses_new_polling = 1;
-
-	/* vhci_hcd is now ready to be controlled through sysfs */
-	err = sysfs_create_group(&vhci_dev(vhci)->kobj, &dev_attr_group);
-	if (err) {
-		pr_err("create sysfs files\n");
-		return err;
-	}
-
-	return 0;
-}
-
-static void vhci_stop(struct usb_hcd *hcd)
-{
-	struct vhci_hcd *vhci = hcd_to_vhci(hcd);
-	int rhport = 0;
-
-	usbip_dbg_vhci_hc("stop VHCI controller\n");
-
-	/* 1. remove the userland interface of vhci_hcd */
-	sysfs_remove_group(&vhci_dev(vhci)->kobj, &dev_attr_group);
-
-	/* 2. shutdown all the ports of vhci_hcd */
-	for (rhport = 0; rhport < VHCI_NPORTS; rhport++) {
-		struct vhci_device *vdev = &vhci->vdev[rhport];
-
-		usbip_event_add(&vdev->ud, VDEV_EVENT_REMOVED);
-		usbip_stop_eh(&vdev->ud);
-	}
-}
-
-static int vhci_get_frame_number(struct usb_hcd *hcd)
-{
-	pr_err("Not yet implemented\n");
-	return 0;
-}
-
-#ifdef CONFIG_PM
-
-/* FIXME: suspend/resume */
-static int vhci_bus_suspend(struct usb_hcd *hcd)
-{
-	struct vhci_hcd *vhci = hcd_to_vhci(hcd);
-
-	dev_dbg(&hcd->self.root_hub->dev, "%s\n", __func__);
-
-	spin_lock(&vhci->lock);
-	hcd->state = HC_STATE_SUSPENDED;
-	spin_unlock(&vhci->lock);
-
-	return 0;
-}
-
-static int vhci_bus_resume(struct usb_hcd *hcd)
-{
-	struct vhci_hcd *vhci = hcd_to_vhci(hcd);
-	int rc = 0;
-
-	dev_dbg(&hcd->self.root_hub->dev, "%s\n", __func__);
-
-	spin_lock(&vhci->lock);
-	if (!HCD_HW_ACCESSIBLE(hcd))
-		rc = -ESHUTDOWN;
-	else
-		hcd->state = HC_STATE_RUNNING;
-	spin_unlock(&vhci->lock);
-
-	return rc;
-}
-
-#else
-
-#define vhci_bus_suspend      NULL
-#define vhci_bus_resume       NULL
-#endif
-
-static struct hc_driver vhci_hc_driver = {
-	.description	= driver_name,
-	.product_desc	= driver_desc,
-	.hcd_priv_size	= sizeof(struct vhci_hcd),
-
-	.flags		= HCD_USB2,
-
-	.start		= vhci_start,
-	.stop		= vhci_stop,
-
-	.urb_enqueue	= vhci_urb_enqueue,
-	.urb_dequeue	= vhci_urb_dequeue,
-
-	.get_frame_number = vhci_get_frame_number,
-
-	.hub_status_data = vhci_hub_status,
-	.hub_control    = vhci_hub_control,
-	.bus_suspend	= vhci_bus_suspend,
-	.bus_resume	= vhci_bus_resume,
-};
-
-static int vhci_hcd_probe(struct platform_device *pdev)
-{
-	struct usb_hcd		*hcd;
-	int			ret;
-
-	usbip_dbg_vhci_hc("name %s id %d\n", pdev->name, pdev->id);
-
-	/*
-	 * Allocate and initialize hcd.
-	 * Our private data is also allocated automatically.
-	 */
-	hcd = usb_create_hcd(&vhci_hc_driver, &pdev->dev, dev_name(&pdev->dev));
-	if (!hcd) {
-		pr_err("create hcd failed\n");
-		return -ENOMEM;
-	}
-	hcd->has_tt = 1;
-
-	/* this is private data for vhci_hcd */
-	the_controller = hcd_to_vhci(hcd);
-
-	/*
-	 * Finish generic HCD structure initialization and register.
-	 * Call the driver's reset() and start() routines.
-	 */
-	ret = usb_add_hcd(hcd, 0, 0);
-	if (ret != 0) {
-		pr_err("usb_add_hcd failed %d\n", ret);
-		usb_put_hcd(hcd);
-		the_controller = NULL;
-		return ret;
-	}
-
-	usbip_dbg_vhci_hc("bye\n");
-	return 0;
-}
-
-static int vhci_hcd_remove(struct platform_device *pdev)
-{
-	struct usb_hcd	*hcd;
-
-	hcd = platform_get_drvdata(pdev);
-	if (!hcd)
-		return 0;
-
-	/*
-	 * Disconnects the root hub,
-	 * then reverses the effects of usb_add_hcd(),
-	 * invoking the HCD's stop() methods.
-	 */
-	usb_remove_hcd(hcd);
-	usb_put_hcd(hcd);
-	the_controller = NULL;
-
-	return 0;
-}
-
-#ifdef CONFIG_PM
-
-/* what should happen for USB/IP under suspend/resume? */
-static int vhci_hcd_suspend(struct platform_device *pdev, pm_message_t state)
-{
-	struct usb_hcd *hcd;
-	int rhport = 0;
-	int connected = 0;
-	int ret = 0;
-
-	hcd = platform_get_drvdata(pdev);
-
-	spin_lock(&the_controller->lock);
-
-	for (rhport = 0; rhport < VHCI_NPORTS; rhport++)
-		if (the_controller->port_status[rhport] &
-		    USB_PORT_STAT_CONNECTION)
-			connected += 1;
-
-	spin_unlock(&the_controller->lock);
-
-	if (connected > 0) {
-		dev_info(&pdev->dev,
-			 "We have %d active connection%s. Do not suspend.\n",
-			 connected, (connected == 1 ? "" : "s"));
-		ret =  -EBUSY;
-	} else {
-		dev_info(&pdev->dev, "suspend vhci_hcd");
-		clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
-	}
-
-	return ret;
-}
-
-static int vhci_hcd_resume(struct platform_device *pdev)
-{
-	struct usb_hcd *hcd;
-
-	dev_dbg(&pdev->dev, "%s\n", __func__);
-
-	hcd = platform_get_drvdata(pdev);
-	set_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
-	usb_hcd_poll_rh_status(hcd);
-
-	return 0;
-}
-
-#else
-
-#define vhci_hcd_suspend	NULL
-#define vhci_hcd_resume		NULL
-
-#endif
-
-static struct platform_driver vhci_driver = {
-	.probe	= vhci_hcd_probe,
-	.remove	= vhci_hcd_remove,
-	.suspend = vhci_hcd_suspend,
-	.resume	= vhci_hcd_resume,
-	.driver	= {
-		.name = driver_name,
-		.owner = THIS_MODULE,
-	},
-};
-
-/*
- * The VHCI 'device' is 'virtual'; not a real plug&play hardware.
- * We need to add this virtual device as a platform device arbitrarily:
- *	1. platform_device_register()
- */
-static void the_pdev_release(struct device *dev)
-{
-}
-
-static struct platform_device the_pdev = {
-	/* should be the same name as driver_name */
-	.name = driver_name,
-	.id = -1,
-	.dev = {
-		.release = the_pdev_release,
-	},
-};
-
-static int __init vhci_hcd_init(void)
-{
-	int ret;
-
-	if (usb_disabled())
-		return -ENODEV;
-
-	ret = platform_driver_register(&vhci_driver);
-	if (ret)
-		goto err_driver_register;
-
-	ret = platform_device_register(&the_pdev);
-	if (ret)
-		goto err_platform_device_register;
-
-	pr_info(DRIVER_DESC " v" USBIP_VERSION "\n");
-	return ret;
-
-err_platform_device_register:
-	platform_driver_unregister(&vhci_driver);
-err_driver_register:
-	return ret;
-}
-
-static void __exit vhci_hcd_exit(void)
-{
-	platform_device_unregister(&the_pdev);
-	platform_driver_unregister(&vhci_driver);
-}
-
-module_init(vhci_hcd_init);
-module_exit(vhci_hcd_exit);
-
-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_LICENSE("GPL");
-MODULE_VERSION(USBIP_VERSION);
diff --git a/drivers/staging/usbip/vhci_rx.c b/drivers/staging/usbip/vhci_rx.c
deleted file mode 100644
index 00e4a54308e4..000000000000
--- a/drivers/staging/usbip/vhci_rx.c
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * Copyright (C) 2003-2008 Takahiro Hirofuchi
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- * USA.
- */
-
-#include <linux/kthread.h>
-#include <linux/slab.h>
-
-#include "usbip_common.h"
-#include "vhci.h"
-
-/* get URB from transmitted urb queue. caller must hold vdev->priv_lock */
-struct urb *pickup_urb_and_free_priv(struct vhci_device *vdev, __u32 seqnum)
-{
-	struct vhci_priv *priv, *tmp;
-	struct urb *urb = NULL;
-	int status;
-
-	list_for_each_entry_safe(priv, tmp, &vdev->priv_rx, list) {
-		if (priv->seqnum != seqnum)
-			continue;
-
-		urb = priv->urb;
-		status = urb->status;
-
-		usbip_dbg_vhci_rx("find urb %p vurb %p seqnum %u\n",
-				urb, priv, seqnum);
-
-		switch (status) {
-		case -ENOENT:
-			/* fall through */
-		case -ECONNRESET:
-			dev_info(&urb->dev->dev,
-				 "urb %p was unlinked %ssynchronuously.\n", urb,
-				 status == -ENOENT ? "" : "a");
-			break;
-		case -EINPROGRESS:
-			/* no info output */
-			break;
-		default:
-			dev_info(&urb->dev->dev,
-				 "urb %p may be in a error, status %d\n", urb,
-				 status);
-		}
-
-		list_del(&priv->list);
-		kfree(priv);
-		urb->hcpriv = NULL;
-
-		break;
-	}
-
-	return urb;
-}
-
-static void vhci_recv_ret_submit(struct vhci_device *vdev,
-				 struct usbip_header *pdu)
-{
-	struct usbip_device *ud = &vdev->ud;
-	struct urb *urb;
-
-	spin_lock(&vdev->priv_lock);
-	urb = pickup_urb_and_free_priv(vdev, pdu->base.seqnum);
-	spin_unlock(&vdev->priv_lock);
-
-	if (!urb) {
-		pr_err("cannot find a urb of seqnum %u\n", pdu->base.seqnum);
-		pr_info("max seqnum %d\n",
-			atomic_read(&the_controller->seqnum));
-		usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
-		return;
-	}
-
-	/* unpack the pdu to a urb */
-	usbip_pack_pdu(pdu, urb, USBIP_RET_SUBMIT, 0);
-
-	/* recv transfer buffer */
-	if (usbip_recv_xbuff(ud, urb) < 0)
-		return;
-
-	/* recv iso_packet_descriptor */
-	if (usbip_recv_iso(ud, urb) < 0)
-		return;
-
-	/* restore the padding in iso packets */
-	usbip_pad_iso(ud, urb);
-
-	if (usbip_dbg_flag_vhci_rx)
-		usbip_dump_urb(urb);
-
-	usbip_dbg_vhci_rx("now giveback urb %p\n", urb);
-
-	spin_lock(&the_controller->lock);
-	usb_hcd_unlink_urb_from_ep(vhci_to_hcd(the_controller), urb);
-	spin_unlock(&the_controller->lock);
-
-	usb_hcd_giveback_urb(vhci_to_hcd(the_controller), urb, urb->status);
-
-	usbip_dbg_vhci_rx("Leave\n");
-}
-
-static struct vhci_unlink *dequeue_pending_unlink(struct vhci_device *vdev,
-						  struct usbip_header *pdu)
-{
-	struct vhci_unlink *unlink, *tmp;
-
-	spin_lock(&vdev->priv_lock);
-
-	list_for_each_entry_safe(unlink, tmp, &vdev->unlink_rx, list) {
-		pr_info("unlink->seqnum %lu\n", unlink->seqnum);
-		if (unlink->seqnum == pdu->base.seqnum) {
-			usbip_dbg_vhci_rx("found pending unlink, %lu\n",
-					  unlink->seqnum);
-			list_del(&unlink->list);
-
-			spin_unlock(&vdev->priv_lock);
-			return unlink;
-		}
-	}
-
-	spin_unlock(&vdev->priv_lock);
-
-	return NULL;
-}
-
-static void vhci_recv_ret_unlink(struct vhci_device *vdev,
-				 struct usbip_header *pdu)
-{
-	struct vhci_unlink *unlink;
-	struct urb *urb;
-
-	usbip_dump_header(pdu);
-
-	unlink = dequeue_pending_unlink(vdev, pdu);
-	if (!unlink) {
-		pr_info("cannot find the pending unlink %u\n",
-			pdu->base.seqnum);
-		return;
-	}
-
-	spin_lock(&vdev->priv_lock);
-	urb = pickup_urb_and_free_priv(vdev, unlink->unlink_seqnum);
-	spin_unlock(&vdev->priv_lock);
-
-	if (!urb) {
-		/*
-		 * I get the result of a unlink request. But, it seems that I
-		 * already received the result of its submit result and gave
-		 * back the URB.
-		 */
-		pr_info("the urb (seqnum %d) was already given back\n",
-			pdu->base.seqnum);
-	} else {
-		usbip_dbg_vhci_rx("now giveback urb %p\n", urb);
-
-		/* If unlink is successful, status is -ECONNRESET */
-		urb->status = pdu->u.ret_unlink.status;
-		pr_info("urb->status %d\n", urb->status);
-
-		spin_lock(&the_controller->lock);
-		usb_hcd_unlink_urb_from_ep(vhci_to_hcd(the_controller), urb);
-		spin_unlock(&the_controller->lock);
-
-		usb_hcd_giveback_urb(vhci_to_hcd(the_controller), urb,
-				     urb->status);
-	}
-
-	kfree(unlink);
-}
-
-static int vhci_priv_tx_empty(struct vhci_device *vdev)
-{
-	int empty = 0;
-
-	spin_lock(&vdev->priv_lock);
-	empty = list_empty(&vdev->priv_rx);
-	spin_unlock(&vdev->priv_lock);
-
-	return empty;
-}
-
-/* recv a pdu */
-static void vhci_rx_pdu(struct usbip_device *ud)
-{
-	int ret;
-	struct usbip_header pdu;
-	struct vhci_device *vdev = container_of(ud, struct vhci_device, ud);
-
-	usbip_dbg_vhci_rx("Enter\n");
-
-	memset(&pdu, 0, sizeof(pdu));
-
-	/* receive a pdu header */
-	ret = usbip_recv(ud->tcp_socket, &pdu, sizeof(pdu));
-	if (ret < 0) {
-		if (ret == -ECONNRESET)
-			pr_info("connection reset by peer\n");
-		else if (ret == -EAGAIN) {
-			/* ignore if connection was idle */
-			if (vhci_priv_tx_empty(vdev))
-				return;
-			pr_info("connection timed out with pending urbs\n");
-		} else if (ret != -ERESTARTSYS)
-			pr_info("xmit failed %d\n", ret);
-
-		usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
-		return;
-	}
-	if (ret == 0) {
-		pr_info("connection closed");
-		usbip_event_add(ud, VDEV_EVENT_DOWN);
-		return;
-	}
-	if (ret != sizeof(pdu)) {
-		pr_err("received pdu size is %d, should be %d\n", ret,
-		       (unsigned int)sizeof(pdu));
-		usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
-		return;
-	}
-
-	usbip_header_correct_endian(&pdu, 0);
-
-	if (usbip_dbg_flag_vhci_rx)
-		usbip_dump_header(&pdu);
-
-	switch (pdu.base.command) {
-	case USBIP_RET_SUBMIT:
-		vhci_recv_ret_submit(vdev, &pdu);
-		break;
-	case USBIP_RET_UNLINK:
-		vhci_recv_ret_unlink(vdev, &pdu);
-		break;
-	default:
-		/* NOT REACHED */
-		pr_err("unknown pdu %u\n", pdu.base.command);
-		usbip_dump_header(&pdu);
-		usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
-		break;
-	}
-}
-
-int vhci_rx_loop(void *data)
-{
-	struct usbip_device *ud = data;
-
-	while (!kthread_should_stop()) {
-		if (usbip_event_happened(ud))
-			break;
-
-		vhci_rx_pdu(ud);
-	}
-
-	return 0;
-}
diff --git a/drivers/staging/usbip/vhci_sysfs.c b/drivers/staging/usbip/vhci_sysfs.c
deleted file mode 100644
index 211f43f67ea2..000000000000
--- a/drivers/staging/usbip/vhci_sysfs.c
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Copyright (C) 2003-2008 Takahiro Hirofuchi
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- * USA.
- */
-
-#include <linux/kthread.h>
-#include <linux/file.h>
-#include <linux/net.h>
-
-#include "usbip_common.h"
-#include "vhci.h"
-
-/* TODO: refine locking ?*/
-
-/* Sysfs entry to show port status */
-static ssize_t status_show(struct device *dev, struct device_attribute *attr,
-			   char *out)
-{
-	char *s = out;
-	int i = 0;
-
-	BUG_ON(!the_controller || !out);
-
-	spin_lock(&the_controller->lock);
-
-	/*
-	 * output example:
-	 * prt sta spd dev socket           local_busid
-	 * 000 004 000 000         c5a7bb80 1-2.3
-	 * 001 004 000 000         d8cee980 2-3.4
-	 *
-	 * IP address can be retrieved from a socket pointer address by looking
-	 * up /proc/net/{tcp,tcp6}. Also, a userland program may remember a
-	 * port number and its peer IP address.
-	 */
-	out += sprintf(out,
-		       "prt sta spd bus dev socket           local_busid\n");
-
-	for (i = 0; i < VHCI_NPORTS; i++) {
-		struct vhci_device *vdev = port_to_vdev(i);
-
-		spin_lock(&vdev->ud.lock);
-		out += sprintf(out, "%03u %03u ", i, vdev->ud.status);
-
-		if (vdev->ud.status == VDEV_ST_USED) {
-			out += sprintf(out, "%03u %08x ",
-				       vdev->speed, vdev->devid);
-			out += sprintf(out, "%16p ", vdev->ud.tcp_socket);
-			out += sprintf(out, "%s", dev_name(&vdev->udev->dev));
-
-		} else {
-			out += sprintf(out, "000 000 000 0000000000000000 0-0");
-		}
-
-		out += sprintf(out, "\n");
-		spin_unlock(&vdev->ud.lock);
-	}
-
-	spin_unlock(&the_controller->lock);
-
-	return out - s;
-}
-static DEVICE_ATTR_RO(status);
-
-/* Sysfs entry to shutdown a virtual connection */
-static int vhci_port_disconnect(__u32 rhport)
-{
-	struct vhci_device *vdev;
-
-	usbip_dbg_vhci_sysfs("enter\n");
-
-	/* lock */
-	spin_lock(&the_controller->lock);
-
-	vdev = port_to_vdev(rhport);
-
-	spin_lock(&vdev->ud.lock);
-	if (vdev->ud.status == VDEV_ST_NULL) {
-		pr_err("not connected %d\n", vdev->ud.status);
-
-		/* unlock */
-		spin_unlock(&vdev->ud.lock);
-		spin_unlock(&the_controller->lock);
-
-		return -EINVAL;
-	}
-
-	/* unlock */
-	spin_unlock(&vdev->ud.lock);
-	spin_unlock(&the_controller->lock);
-
-	usbip_event_add(&vdev->ud, VDEV_EVENT_DOWN);
-
-	return 0;
-}
-
-static ssize_t store_detach(struct device *dev, struct device_attribute *attr,
-			    const char *buf, size_t count)
-{
-	int err;
-	__u32 rhport = 0;
-
-	if (sscanf(buf, "%u", &rhport) != 1)
-		return -EINVAL;
-
-	/* check rhport */
-	if (rhport >= VHCI_NPORTS) {
-		dev_err(dev, "invalid port %u\n", rhport);
-		return -EINVAL;
-	}
-
-	err = vhci_port_disconnect(rhport);
-	if (err < 0)
-		return -EINVAL;
-
-	usbip_dbg_vhci_sysfs("Leave\n");
-
-	return count;
-}
-static DEVICE_ATTR(detach, S_IWUSR, NULL, store_detach);
-
-/* Sysfs entry to establish a virtual connection */
-static int valid_args(__u32 rhport, enum usb_device_speed speed)
-{
-	/* check rhport */
-	if (rhport >= VHCI_NPORTS) {
-		pr_err("port %u\n", rhport);
-		return -EINVAL;
-	}
-
-	/* check speed */
-	switch (speed) {
-	case USB_SPEED_LOW:
-	case USB_SPEED_FULL:
-	case USB_SPEED_HIGH:
-	case USB_SPEED_WIRELESS:
-		break;
-	default:
-		pr_err("Failed attach request for unsupported USB speed: %s\n",
-			usb_speed_string(speed));
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/*
- * To start a new USB/IP attachment, a userland program needs to setup a TCP
- * connection and then write its socket descriptor with remote device
- * information into this sysfs file.
- *
- * A remote device is virtually attached to the root-hub port of @rhport with
- * @speed. @devid is embedded into a request to specify the remote device in a
- * server host.
- *
- * write() returns 0 on success, else negative errno.
- */
-static ssize_t store_attach(struct device *dev, struct device_attribute *attr,
-			    const char *buf, size_t count)
-{
-	struct vhci_device *vdev;
-	struct socket *socket;
-	int sockfd = 0;
-	__u32 rhport = 0, devid = 0, speed = 0;
-	int err;
-
-	/*
-	 * @rhport: port number of vhci_hcd
-	 * @sockfd: socket descriptor of an established TCP connection
-	 * @devid: unique device identifier in a remote host
-	 * @speed: usb device speed in a remote host
-	 */
-	if (sscanf(buf, "%u %u %u %u", &rhport, &sockfd, &devid, &speed) != 4)
-		return -EINVAL;
-
-	usbip_dbg_vhci_sysfs("rhport(%u) sockfd(%u) devid(%u) speed(%u)\n",
-			     rhport, sockfd, devid, speed);
-
-	/* check received parameters */
-	if (valid_args(rhport, speed) < 0)
-		return -EINVAL;
-
-	/* Extract socket from fd. */
-	socket = sockfd_lookup(sockfd, &err);
-	if (!socket)
-		return -EINVAL;
-
-	/* now need lock until setting vdev status as used */
-
-	/* begin a lock */
-	spin_lock(&the_controller->lock);
-	vdev = port_to_vdev(rhport);
-	spin_lock(&vdev->ud.lock);
-
-	if (vdev->ud.status != VDEV_ST_NULL) {
-		/* end of the lock */
-		spin_unlock(&vdev->ud.lock);
-		spin_unlock(&the_controller->lock);
-
-		sockfd_put(socket);
-
-		dev_err(dev, "port %d already used\n", rhport);
-		return -EINVAL;
-	}
-
-	dev_info(dev,
-		 "rhport(%u) sockfd(%d) devid(%u) speed(%u) speed_str(%s)\n",
-		 rhport, sockfd, devid, speed, usb_speed_string(speed));
-
-	vdev->devid         = devid;
-	vdev->speed         = speed;
-	vdev->ud.tcp_socket = socket;
-	vdev->ud.status     = VDEV_ST_NOTASSIGNED;
-
-	spin_unlock(&vdev->ud.lock);
-	spin_unlock(&the_controller->lock);
-	/* end the lock */
-
-	vdev->ud.tcp_rx = kthread_get_run(vhci_rx_loop, &vdev->ud, "vhci_rx");
-	vdev->ud.tcp_tx = kthread_get_run(vhci_tx_loop, &vdev->ud, "vhci_tx");
-
-	rh_port_connect(rhport, speed);
-
-	return count;
-}
-static DEVICE_ATTR(attach, S_IWUSR, NULL, store_attach);
-
-static struct attribute *dev_attrs[] = {
-	&dev_attr_status.attr,
-	&dev_attr_detach.attr,
-	&dev_attr_attach.attr,
-	&dev_attr_usbip_debug.attr,
-	NULL,
-};
-
-const struct attribute_group dev_attr_group = {
-	.attrs = dev_attrs,
-};
diff --git a/drivers/staging/usbip/vhci_tx.c b/drivers/staging/usbip/vhci_tx.c
deleted file mode 100644
index 409fd99f3257..000000000000
--- a/drivers/staging/usbip/vhci_tx.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Copyright (C) 2003-2008 Takahiro Hirofuchi
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
- * USA.
- */
-
-#include <linux/kthread.h>
-#include <linux/slab.h>
-
-#include "usbip_common.h"
-#include "vhci.h"
-
-static void setup_cmd_submit_pdu(struct usbip_header *pdup,  struct urb *urb)
-{
-	struct vhci_priv *priv = ((struct vhci_priv *)urb->hcpriv);
-	struct vhci_device *vdev = priv->vdev;
-
-	usbip_dbg_vhci_tx("URB, local devnum %u, remote devid %u\n",
-			  usb_pipedevice(urb->pipe), vdev->devid);
-
-	pdup->base.command   = USBIP_CMD_SUBMIT;
-	pdup->base.seqnum    = priv->seqnum;
-	pdup->base.devid     = vdev->devid;
-	pdup->base.direction = usb_pipein(urb->pipe) ?
-		USBIP_DIR_IN : USBIP_DIR_OUT;
-	pdup->base.ep	     = usb_pipeendpoint(urb->pipe);
-
-	usbip_pack_pdu(pdup, urb, USBIP_CMD_SUBMIT, 1);
-
-	if (urb->setup_packet)
-		memcpy(pdup->u.cmd_submit.setup, urb->setup_packet, 8);
-}
-
-static struct vhci_priv *dequeue_from_priv_tx(struct vhci_device *vdev)
-{
-	struct vhci_priv *priv, *tmp;
-
-	spin_lock(&vdev->priv_lock);
-
-	list_for_each_entry_safe(priv, tmp, &vdev->priv_tx, list) {
-		list_move_tail(&priv->list, &vdev->priv_rx);
-		spin_unlock(&vdev->priv_lock);
-		return priv;
-	}
-
-	spin_unlock(&vdev->priv_lock);
-
-	return NULL;
-}
-
-static int vhci_send_cmd_submit(struct vhci_device *vdev)
-{
-	struct vhci_priv *priv = NULL;
-
-	struct msghdr msg;
-	struct kvec iov[3];
-	size_t txsize;
-
-	size_t total_size = 0;
-
-	while ((priv = dequeue_from_priv_tx(vdev)) != NULL) {
-		int ret;
-		struct urb *urb = priv->urb;
-		struct usbip_header pdu_header;
-		struct usbip_iso_packet_descriptor *iso_buffer = NULL;
-
-		txsize = 0;
-		memset(&pdu_header, 0, sizeof(pdu_header));
-		memset(&msg, 0, sizeof(msg));
-		memset(&iov, 0, sizeof(iov));
-
-		usbip_dbg_vhci_tx("setup txdata urb %p\n", urb);
-
-		/* 1. setup usbip_header */
-		setup_cmd_submit_pdu(&pdu_header, urb);
-		usbip_header_correct_endian(&pdu_header, 1);
-
-		iov[0].iov_base = &pdu_header;
-		iov[0].iov_len  = sizeof(pdu_header);
-		txsize += sizeof(pdu_header);
-
-		/* 2. setup transfer buffer */
-		if (!usb_pipein(urb->pipe) && urb->transfer_buffer_length > 0) {
-			iov[1].iov_base = urb->transfer_buffer;
-			iov[1].iov_len  = urb->transfer_buffer_length;
-			txsize += urb->transfer_buffer_length;
-		}
-
-		/* 3. setup iso_packet_descriptor */
-		if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) {
-			ssize_t len = 0;
-
-			iso_buffer = usbip_alloc_iso_desc_pdu(urb, &len);
-			if (!iso_buffer) {
-				usbip_event_add(&vdev->ud,
-						SDEV_EVENT_ERROR_MALLOC);
-				return -1;
-			}
-
-			iov[2].iov_base = iso_buffer;
-			iov[2].iov_len  = len;
-			txsize += len;
-		}
-
-		ret = kernel_sendmsg(vdev->ud.tcp_socket, &msg, iov, 3, txsize);
-		if (ret != txsize) {
-			pr_err("sendmsg failed!, ret=%d for %zd\n", ret,
-			       txsize);
-			kfree(iso_buffer);
-			usbip_event_add(&vdev->ud, VDEV_EVENT_ERROR_TCP);
-			return -1;
-		}
-
-		kfree(iso_buffer);
-		usbip_dbg_vhci_tx("send txdata\n");
-
-		total_size += txsize;
-	}
-
-	return total_size;
-}
-
-static struct vhci_unlink *dequeue_from_unlink_tx(struct vhci_device *vdev)
-{
-	struct vhci_unlink *unlink, *tmp;
-
-	spin_lock(&vdev->priv_lock);
-
-	list_for_each_entry_safe(unlink, tmp, &vdev->unlink_tx, list) {
-		list_move_tail(&unlink->list, &vdev->unlink_rx);
-		spin_unlock(&vdev->priv_lock);
-		return unlink;
-	}
-
-	spin_unlock(&vdev->priv_lock);
-
-	return NULL;
-}
-
-static int vhci_send_cmd_unlink(struct vhci_device *vdev)
-{
-	struct vhci_unlink *unlink = NULL;
-
-	struct msghdr msg;
-	struct kvec iov[3];
-	size_t txsize;
-
-	size_t total_size = 0;
-
-	while ((unlink = dequeue_from_unlink_tx(vdev)) != NULL) {
-		int ret;
-		struct usbip_header pdu_header;
-
-		txsize = 0;
-		memset(&pdu_header, 0, sizeof(pdu_header));
-		memset(&msg, 0, sizeof(msg));
-		memset(&iov, 0, sizeof(iov));
-
-		usbip_dbg_vhci_tx("setup cmd unlink, %lu\n", unlink->seqnum);
-
-		/* 1. setup usbip_header */
-		pdu_header.base.command = USBIP_CMD_UNLINK;
-		pdu_header.base.seqnum  = unlink->seqnum;
-		pdu_header.base.devid	= vdev->devid;
-		pdu_header.base.ep	= 0;
-		pdu_header.u.cmd_unlink.seqnum = unlink->unlink_seqnum;
-
-		usbip_header_correct_endian(&pdu_header, 1);
-
-		iov[0].iov_base = &pdu_header;
-		iov[0].iov_len  = sizeof(pdu_header);
-		txsize += sizeof(pdu_header);
-
-		ret = kernel_sendmsg(vdev->ud.tcp_socket, &msg, iov, 1, txsize);
-		if (ret != txsize) {
-			pr_err("sendmsg failed!, ret=%d for %zd\n", ret,
-			       txsize);
-			usbip_event_add(&vdev->ud, VDEV_EVENT_ERROR_TCP);
-			return -1;
-		}
-
-		usbip_dbg_vhci_tx("send txdata\n");
-
-		total_size += txsize;
-	}
-
-	return total_size;
-}
-
-int vhci_tx_loop(void *data)
-{
-	struct usbip_device *ud = data;
-	struct vhci_device *vdev = container_of(ud, struct vhci_device, ud);
-
-	while (!kthread_should_stop()) {
-		if (vhci_send_cmd_submit(vdev) < 0)
-			break;
-
-		if (vhci_send_cmd_unlink(vdev) < 0)
-			break;
-
-		wait_event_interruptible(vdev->waitq_tx,
-					 (!list_empty(&vdev->priv_tx) ||
-					  !list_empty(&vdev->unlink_tx) ||
-					  kthread_should_stop()));
-
-		usbip_dbg_vhci_tx("pending urbs ?, now wake up\n");
-	}
-
-	return 0;
-}
diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig
index e0cad4418085..cf1b19bca306 100644
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -92,6 +92,8 @@ source "drivers/usb/storage/Kconfig"
 
 source "drivers/usb/image/Kconfig"
 
+source "drivers/usb/usbip/Kconfig"
+
 endif
 
 source "drivers/usb/musb/Kconfig"
diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile
index 3cba892b83a2..d7be71778059 100644
--- a/drivers/usb/Makefile
+++ b/drivers/usb/Makefile
@@ -60,3 +60,5 @@ obj-$(CONFIG_USB_RENESAS_USBHS)	+= renesas_usbhs/
 obj-$(CONFIG_USB_GADGET)	+= gadget/
 
 obj-$(CONFIG_USB_COMMON)	+= common/
+
+obj-$(CONFIG_USBIP_CORE)	+= usbip/
diff --git a/drivers/usb/usbip/Kconfig b/drivers/usb/usbip/Kconfig
new file mode 100644
index 000000000000..bd99e9e47e50
--- /dev/null
+++ b/drivers/usb/usbip/Kconfig
@@ -0,0 +1,41 @@
+config USBIP_CORE
+	tristate "USB/IP support"
+	depends on USB && NET
+	---help---
+	  This enables pushing USB packets over IP to allow remote
+	  machines direct access to USB devices. It provides the
+	  USB/IP core that is required by both drivers.
+
+	  For more details, and to get the userspace utility
+	  programs, please see <http://usbip.sourceforge.net/>.
+
+	  To compile this as a module, choose M here: the module will
+	  be called usbip-core.
+
+	  If unsure, say N.
+
+config USBIP_VHCI_HCD
+	tristate "VHCI hcd"
+	depends on USBIP_CORE
+	---help---
+	  This enables the USB/IP virtual host controller driver,
+	  which is run on the remote machine.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called vhci-hcd.
+
+config USBIP_HOST
+	tristate "Host driver"
+	depends on USBIP_CORE
+	---help---
+	  This enables the USB/IP host driver, which is run on the
+	  machine that is sharing the USB devices.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called usbip-host.
+
+config USBIP_DEBUG
+	bool "Debug messages for USB/IP"
+	depends on USBIP_CORE
+	---help---
+	  This enables the debug messages from the USB/IP drivers.
diff --git a/drivers/usb/usbip/Makefile b/drivers/usb/usbip/Makefile
new file mode 100644
index 000000000000..9ecd61545be1
--- /dev/null
+++ b/drivers/usb/usbip/Makefile
@@ -0,0 +1,10 @@
+ccflags-$(CONFIG_USBIP_DEBUG) := -DDEBUG
+
+obj-$(CONFIG_USBIP_CORE) += usbip-core.o
+usbip-core-y := usbip_common.o usbip_event.o
+
+obj-$(CONFIG_USBIP_VHCI_HCD) += vhci-hcd.o
+vhci-hcd-y := vhci_sysfs.o vhci_tx.o vhci_rx.o vhci_hcd.o
+
+obj-$(CONFIG_USBIP_HOST) += usbip-host.o
+usbip-host-y := stub_dev.o stub_main.o stub_rx.o stub_tx.o
diff --git a/drivers/usb/usbip/README b/drivers/usb/usbip/README
new file mode 100644
index 000000000000..41a2cf2e77a6
--- /dev/null
+++ b/drivers/usb/usbip/README
@@ -0,0 +1,7 @@
+TODO:
+	- more discussion about the protocol
+	- testing
+	- review of the userspace interface
+	- document the protocol
+
+Please send patches for this code to Greg Kroah-Hartman <greg@kroah.com>
diff --git a/drivers/usb/usbip/stub.h b/drivers/usb/usbip/stub.h
new file mode 100644
index 000000000000..266e2b0ce9a8
--- /dev/null
+++ b/drivers/usb/usbip/stub.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2003-2008 Takahiro Hirofuchi
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+#ifndef __USBIP_STUB_H
+#define __USBIP_STUB_H
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/usb.h>
+#include <linux/wait.h>
+
+#define STUB_BUSID_OTHER 0
+#define STUB_BUSID_REMOV 1
+#define STUB_BUSID_ADDED 2
+#define STUB_BUSID_ALLOC 3
+
+struct stub_device {
+	struct usb_interface *interface;
+	struct usb_device *udev;
+
+	struct usbip_device ud;
+	__u32 devid;
+
+	/*
+	 * stub_priv preserves private data of each urb.
+	 * It is allocated as stub_priv_cache and assigned to urb->context.
+	 *
+	 * stub_priv is always linked to any one of 3 lists;
+	 *	priv_init: linked to this until the comletion of a urb.
+	 *	priv_tx  : linked to this after the completion of a urb.
+	 *	priv_free: linked to this after the sending of the result.
+	 *
+	 * Any of these list operations should be locked by priv_lock.
+	 */
+	spinlock_t priv_lock;
+	struct list_head priv_init;
+	struct list_head priv_tx;
+	struct list_head priv_free;
+
+	/* see comments for unlinking in stub_rx.c */
+	struct list_head unlink_tx;
+	struct list_head unlink_free;
+
+	wait_queue_head_t tx_waitq;
+};
+
+/* private data into urb->priv */
+struct stub_priv {
+	unsigned long seqnum;
+	struct list_head list;
+	struct stub_device *sdev;
+	struct urb *urb;
+
+	int unlinking;
+};
+
+struct stub_unlink {
+	unsigned long seqnum;
+	struct list_head list;
+	__u32 status;
+};
+
+/* same as SYSFS_BUS_ID_SIZE */
+#define BUSID_SIZE 32
+
+struct bus_id_priv {
+	char name[BUSID_SIZE];
+	char status;
+	int interf_count;
+	struct stub_device *sdev;
+	struct usb_device *udev;
+	char shutdown_busid;
+};
+
+/* stub_priv is allocated from stub_priv_cache */
+extern struct kmem_cache *stub_priv_cache;
+
+/* stub_dev.c */
+extern struct usb_device_driver stub_driver;
+
+/* stub_main.c */
+struct bus_id_priv *get_busid_priv(const char *busid);
+int del_match_busid(char *busid);
+void stub_device_cleanup_urbs(struct stub_device *sdev);
+
+/* stub_rx.c */
+int stub_rx_loop(void *data);
+
+/* stub_tx.c */
+void stub_enqueue_ret_unlink(struct stub_device *sdev, __u32 seqnum,
+			     __u32 status);
+void stub_complete(struct urb *urb);
+int stub_tx_loop(void *data);
+
+#endif /* __USBIP_STUB_H */
diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c
new file mode 100644
index 000000000000..51d0c7188738
--- /dev/null
+++ b/drivers/usb/usbip/stub_dev.c
@@ -0,0 +1,525 @@
+/*
+ * Copyright (C) 2003-2008 Takahiro Hirofuchi
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+#include <linux/device.h>
+#include <linux/file.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+
+#include "usbip_common.h"
+#include "stub.h"
+
+/*
+ * Define device IDs here if you want to explicitly limit exportable devices.
+ * In most cases, wildcard matching will be okay because driver binding can be
+ * changed dynamically by a userland program.
+ */
+static struct usb_device_id stub_table[] = {
+#if 0
+	/* just an example */
+	{ USB_DEVICE(0x05ac, 0x0301) },   /* Mac 1 button mouse */
+	{ USB_DEVICE(0x0430, 0x0009) },   /* Plat Home Keyboard */
+	{ USB_DEVICE(0x059b, 0x0001) },   /* Iomega USB Zip 100 */
+	{ USB_DEVICE(0x04b3, 0x4427) },   /* IBM USB CD-ROM */
+	{ USB_DEVICE(0x05a9, 0xa511) },   /* LifeView USB cam */
+	{ USB_DEVICE(0x55aa, 0x0201) },   /* Imation card reader */
+	{ USB_DEVICE(0x046d, 0x0870) },   /* Qcam Express(QV-30) */
+	{ USB_DEVICE(0x04bb, 0x0101) },   /* IO-DATA HD 120GB */
+	{ USB_DEVICE(0x04bb, 0x0904) },   /* IO-DATA USB-ET/TX */
+	{ USB_DEVICE(0x04bb, 0x0201) },   /* IO-DATA USB-ET/TX */
+	{ USB_DEVICE(0x08bb, 0x2702) },   /* ONKYO USB Speaker */
+	{ USB_DEVICE(0x046d, 0x08b2) },   /* Logicool Qcam 4000 Pro */
+#endif
+	/* magic for wild card */
+	{ .driver_info = 1 },
+	{ 0, }                                     /* Terminating entry */
+};
+MODULE_DEVICE_TABLE(usb, stub_table);
+
+/*
+ * usbip_status shows the status of usbip-host as long as this driver is bound
+ * to the target device.
+ */
+static ssize_t usbip_status_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct stub_device *sdev = dev_get_drvdata(dev);
+	int status;
+
+	if (!sdev) {
+		dev_err(dev, "sdev is null\n");
+		return -ENODEV;
+	}
+
+	spin_lock_irq(&sdev->ud.lock);
+	status = sdev->ud.status;
+	spin_unlock_irq(&sdev->ud.lock);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", status);
+}
+static DEVICE_ATTR_RO(usbip_status);
+
+/*
+ * usbip_sockfd gets a socket descriptor of an established TCP connection that
+ * is used to transfer usbip requests by kernel threads. -1 is a magic number
+ * by which usbip connection is finished.
+ */
+static ssize_t store_sockfd(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct stub_device *sdev = dev_get_drvdata(dev);
+	int sockfd = 0;
+	struct socket *socket;
+	int rv;
+
+	if (!sdev) {
+		dev_err(dev, "sdev is null\n");
+		return -ENODEV;
+	}
+
+	rv = sscanf(buf, "%d", &sockfd);
+	if (rv != 1)
+		return -EINVAL;
+
+	if (sockfd != -1) {
+		int err;
+
+		dev_info(dev, "stub up\n");
+
+		spin_lock_irq(&sdev->ud.lock);
+
+		if (sdev->ud.status != SDEV_ST_AVAILABLE) {
+			dev_err(dev, "not ready\n");
+			goto err;
+		}
+
+		socket = sockfd_lookup(sockfd, &err);
+		if (!socket)
+			goto err;
+
+		sdev->ud.tcp_socket = socket;
+
+		spin_unlock_irq(&sdev->ud.lock);
+
+		sdev->ud.tcp_rx = kthread_get_run(stub_rx_loop, &sdev->ud,
+						  "stub_rx");
+		sdev->ud.tcp_tx = kthread_get_run(stub_tx_loop, &sdev->ud,
+						  "stub_tx");
+
+		spin_lock_irq(&sdev->ud.lock);
+		sdev->ud.status = SDEV_ST_USED;
+		spin_unlock_irq(&sdev->ud.lock);
+
+	} else {
+		dev_info(dev, "stub down\n");
+
+		spin_lock_irq(&sdev->ud.lock);
+		if (sdev->ud.status != SDEV_ST_USED)
+			goto err;
+
+		spin_unlock_irq(&sdev->ud.lock);
+
+		usbip_event_add(&sdev->ud, SDEV_EVENT_DOWN);
+	}
+
+	return count;
+
+err:
+	spin_unlock_irq(&sdev->ud.lock);
+	return -EINVAL;
+}
+static DEVICE_ATTR(usbip_sockfd, S_IWUSR, NULL, store_sockfd);
+
+static int stub_add_files(struct device *dev)
+{
+	int err = 0;
+
+	err = device_create_file(dev, &dev_attr_usbip_status);
+	if (err)
+		goto err_status;
+
+	err = device_create_file(dev, &dev_attr_usbip_sockfd);
+	if (err)
+		goto err_sockfd;
+
+	err = device_create_file(dev, &dev_attr_usbip_debug);
+	if (err)
+		goto err_debug;
+
+	return 0;
+
+err_debug:
+	device_remove_file(dev, &dev_attr_usbip_sockfd);
+err_sockfd:
+	device_remove_file(dev, &dev_attr_usbip_status);
+err_status:
+	return err;
+}
+
+static void stub_remove_files(struct device *dev)
+{
+	device_remove_file(dev, &dev_attr_usbip_status);
+	device_remove_file(dev, &dev_attr_usbip_sockfd);
+	device_remove_file(dev, &dev_attr_usbip_debug);
+}
+
+static void stub_shutdown_connection(struct usbip_device *ud)
+{
+	struct stub_device *sdev = container_of(ud, struct stub_device, ud);
+
+	/*
+	 * When removing an exported device, kernel panic sometimes occurred
+	 * and then EIP was sk_wait_data of stub_rx thread. Is this because
+	 * sk_wait_data returned though stub_rx thread was already finished by
+	 * step 1?
+	 */
+	if (ud->tcp_socket) {
+		dev_dbg(&sdev->udev->dev, "shutdown tcp_socket %p\n",
+			ud->tcp_socket);
+		kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR);
+	}
+
+	/* 1. stop threads */
+	if (ud->tcp_rx) {
+		kthread_stop_put(ud->tcp_rx);
+		ud->tcp_rx = NULL;
+	}
+	if (ud->tcp_tx) {
+		kthread_stop_put(ud->tcp_tx);
+		ud->tcp_tx = NULL;
+	}
+
+	/*
+	 * 2. close the socket
+	 *
+	 * tcp_socket is freed after threads are killed so that usbip_xmit does
+	 * not touch NULL socket.
+	 */
+	if (ud->tcp_socket) {
+		sockfd_put(ud->tcp_socket);
+		ud->tcp_socket = NULL;
+	}
+
+	/* 3. free used data */
+	stub_device_cleanup_urbs(sdev);
+
+	/* 4. free stub_unlink */
+	{
+		unsigned long flags;
+		struct stub_unlink *unlink, *tmp;
+
+		spin_lock_irqsave(&sdev->priv_lock, flags);
+		list_for_each_entry_safe(unlink, tmp, &sdev->unlink_tx, list) {
+			list_del(&unlink->list);
+			kfree(unlink);
+		}
+		list_for_each_entry_safe(unlink, tmp, &sdev->unlink_free,
+					 list) {
+			list_del(&unlink->list);
+			kfree(unlink);
+		}
+		spin_unlock_irqrestore(&sdev->priv_lock, flags);
+	}
+}
+
+static void stub_device_reset(struct usbip_device *ud)
+{
+	struct stub_device *sdev = container_of(ud, struct stub_device, ud);
+	struct usb_device *udev = sdev->udev;
+	int ret;
+
+	dev_dbg(&udev->dev, "device reset");
+
+	ret = usb_lock_device_for_reset(udev, sdev->interface);
+	if (ret < 0) {
+		dev_err(&udev->dev, "lock for reset\n");
+		spin_lock_irq(&ud->lock);
+		ud->status = SDEV_ST_ERROR;
+		spin_unlock_irq(&ud->lock);
+		return;
+	}
+
+	/* try to reset the device */
+	ret = usb_reset_device(udev);
+	usb_unlock_device(udev);
+
+	spin_lock_irq(&ud->lock);
+	if (ret) {
+		dev_err(&udev->dev, "device reset\n");
+		ud->status = SDEV_ST_ERROR;
+	} else {
+		dev_info(&udev->dev, "device reset\n");
+		ud->status = SDEV_ST_AVAILABLE;
+	}
+	spin_unlock_irq(&ud->lock);
+}
+
+static void stub_device_unusable(struct usbip_device *ud)
+{
+	spin_lock_irq(&ud->lock);
+	ud->status = SDEV_ST_ERROR;
+	spin_unlock_irq(&ud->lock);
+}
+
+/**
+ * stub_device_alloc - allocate a new stub_device struct
+ * @interface: usb_interface of a new device
+ *
+ * Allocates and initializes a new stub_device struct.
+ */
+static struct stub_device *stub_device_alloc(struct usb_device *udev)
+{
+	struct stub_device *sdev;
+	int busnum = udev->bus->busnum;
+	int devnum = udev->devnum;
+
+	dev_dbg(&udev->dev, "allocating stub device");
+
+	/* yes, it's a new device */
+	sdev = kzalloc(sizeof(struct stub_device), GFP_KERNEL);
+	if (!sdev)
+		return NULL;
+
+	sdev->udev = usb_get_dev(udev);
+
+	/*
+	 * devid is defined with devnum when this driver is first allocated.
+	 * devnum may change later if a device is reset. However, devid never
+	 * changes during a usbip connection.
+	 */
+	sdev->devid		= (busnum << 16) | devnum;
+	sdev->ud.side		= USBIP_STUB;
+	sdev->ud.status		= SDEV_ST_AVAILABLE;
+	spin_lock_init(&sdev->ud.lock);
+	sdev->ud.tcp_socket	= NULL;
+
+	INIT_LIST_HEAD(&sdev->priv_init);
+	INIT_LIST_HEAD(&sdev->priv_tx);
+	INIT_LIST_HEAD(&sdev->priv_free);
+	INIT_LIST_HEAD(&sdev->unlink_free);
+	INIT_LIST_HEAD(&sdev->unlink_tx);
+	spin_lock_init(&sdev->priv_lock);
+
+	init_waitqueue_head(&sdev->tx_waitq);
+
+	sdev->ud.eh_ops.shutdown = stub_shutdown_connection;
+	sdev->ud.eh_ops.reset    = stub_device_reset;
+	sdev->ud.eh_ops.unusable = stub_device_unusable;
+
+	usbip_start_eh(&sdev->ud);
+
+	dev_dbg(&udev->dev, "register new device\n");
+
+	return sdev;
+}
+
+static void stub_device_free(struct stub_device *sdev)
+{
+	kfree(sdev);
+}
+
+static int stub_probe(struct usb_device *udev)
+{
+	struct stub_device *sdev = NULL;
+	const char *udev_busid = dev_name(&udev->dev);
+	int err = 0;
+	struct bus_id_priv *busid_priv;
+	int rc;
+
+	dev_dbg(&udev->dev, "Enter\n");
+
+	/* check we should claim or not by busid_table */
+	busid_priv = get_busid_priv(udev_busid);
+	if (!busid_priv || (busid_priv->status == STUB_BUSID_REMOV) ||
+	    (busid_priv->status == STUB_BUSID_OTHER)) {
+		dev_info(&udev->dev,
+			"%s is not in match_busid table... skip!\n",
+			udev_busid);
+
+		/*
+		 * Return value should be ENODEV or ENOXIO to continue trying
+		 * other matched drivers by the driver core.
+		 * See driver_probe_device() in driver/base/dd.c
+		 */
+		return -ENODEV;
+	}
+
+	if (udev->descriptor.bDeviceClass == USB_CLASS_HUB) {
+		dev_dbg(&udev->dev, "%s is a usb hub device... skip!\n",
+			 udev_busid);
+		return -ENODEV;
+	}
+
+	if (!strcmp(udev->bus->bus_name, "vhci_hcd")) {
+		dev_dbg(&udev->dev,
+			"%s is attached on vhci_hcd... skip!\n",
+			udev_busid);
+
+		return -ENODEV;
+	}
+
+	/* ok, this is my device */
+	sdev = stub_device_alloc(udev);
+	if (!sdev)
+		return -ENOMEM;
+
+	dev_info(&udev->dev,
+		"usbip-host: register new device (bus %u dev %u)\n",
+		udev->bus->busnum, udev->devnum);
+
+	busid_priv->shutdown_busid = 0;
+
+	/* set private data to usb_device */
+	dev_set_drvdata(&udev->dev, sdev);
+	busid_priv->sdev = sdev;
+	busid_priv->udev = udev;
+
+	/*
+	 * Claim this hub port.
+	 * It doesn't matter what value we pass as owner
+	 * (struct dev_state) as long as it is unique.
+	 */
+	rc = usb_hub_claim_port(udev->parent, udev->portnum,
+			(struct usb_dev_state *) udev);
+	if (rc) {
+		dev_dbg(&udev->dev, "unable to claim port\n");
+		return rc;
+	}
+
+	err = stub_add_files(&udev->dev);
+	if (err) {
+		dev_err(&udev->dev, "stub_add_files for %s\n", udev_busid);
+		dev_set_drvdata(&udev->dev, NULL);
+		usb_put_dev(udev);
+		kthread_stop_put(sdev->ud.eh);
+
+		busid_priv->sdev = NULL;
+		stub_device_free(sdev);
+		return err;
+	}
+	busid_priv->status = STUB_BUSID_ALLOC;
+
+	return 0;
+}
+
+static void shutdown_busid(struct bus_id_priv *busid_priv)
+{
+	if (busid_priv->sdev && !busid_priv->shutdown_busid) {
+		busid_priv->shutdown_busid = 1;
+		usbip_event_add(&busid_priv->sdev->ud, SDEV_EVENT_REMOVED);
+
+		/* wait for the stop of the event handler */
+		usbip_stop_eh(&busid_priv->sdev->ud);
+	}
+}
+
+/*
+ * called in usb_disconnect() or usb_deregister()
+ * but only if actconfig(active configuration) exists
+ */
+static void stub_disconnect(struct usb_device *udev)
+{
+	struct stub_device *sdev;
+	const char *udev_busid = dev_name(&udev->dev);
+	struct bus_id_priv *busid_priv;
+	int rc;
+
+	dev_dbg(&udev->dev, "Enter\n");
+
+	busid_priv = get_busid_priv(udev_busid);
+	if (!busid_priv) {
+		BUG();
+		return;
+	}
+
+	sdev = dev_get_drvdata(&udev->dev);
+
+	/* get stub_device */
+	if (!sdev) {
+		dev_err(&udev->dev, "could not get device");
+		return;
+	}
+
+	dev_set_drvdata(&udev->dev, NULL);
+
+	/*
+	 * NOTE: rx/tx threads are invoked for each usb_device.
+	 */
+	stub_remove_files(&udev->dev);
+
+	/* release port */
+	rc = usb_hub_release_port(udev->parent, udev->portnum,
+				  (struct usb_dev_state *) udev);
+	if (rc) {
+		dev_dbg(&udev->dev, "unable to release port\n");
+		return;
+	}
+
+	/* If usb reset is called from event handler */
+	if (busid_priv->sdev->ud.eh == current)
+		return;
+
+	/* shutdown the current connection */
+	shutdown_busid(busid_priv);
+
+	usb_put_dev(sdev->udev);
+
+	/* free sdev */
+	busid_priv->sdev = NULL;
+	stub_device_free(sdev);
+
+	if (busid_priv->status == STUB_BUSID_ALLOC) {
+		busid_priv->status = STUB_BUSID_ADDED;
+	} else {
+		busid_priv->status = STUB_BUSID_OTHER;
+		del_match_busid((char *)udev_busid);
+	}
+}
+
+#ifdef CONFIG_PM
+
+/* These functions need usb_port_suspend and usb_port_resume,
+ * which reside in drivers/usb/core/usb.h. Skip for now. */
+
+static int stub_suspend(struct usb_device *udev, pm_message_t message)
+{
+	dev_dbg(&udev->dev, "stub_suspend\n");
+
+	return 0;
+}
+
+static int stub_resume(struct usb_device *udev, pm_message_t message)
+{
+	dev_dbg(&udev->dev, "stub_resume\n");
+
+	return 0;
+}
+
+#endif	/* CONFIG_PM */
+
+struct usb_device_driver stub_driver = {
+	.name		= "usbip-host",
+	.probe		= stub_probe,
+	.disconnect	= stub_disconnect,
+#ifdef CONFIG_PM
+	.suspend	= stub_suspend,
+	.resume		= stub_resume,
+#endif
+	.supports_autosuspend	=	0,
+};
diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c
new file mode 100644
index 000000000000..44ab43fc4fcc
--- /dev/null
+++ b/drivers/usb/usbip/stub_main.c
@@ -0,0 +1,335 @@
+/*
+ * Copyright (C) 2003-2008 Takahiro Hirofuchi
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/device.h>
+
+#include "usbip_common.h"
+#include "stub.h"
+
+#define DRIVER_AUTHOR "Takahiro Hirofuchi"
+#define DRIVER_DESC "USB/IP Host Driver"
+
+struct kmem_cache *stub_priv_cache;
+/*
+ * busid_tables defines matching busids that usbip can grab. A user can change
+ * dynamically what device is locally used and what device is exported to a
+ * remote host.
+ */
+#define MAX_BUSID 16
+static struct bus_id_priv busid_table[MAX_BUSID];
+static spinlock_t busid_table_lock;
+
+static void init_busid_table(void)
+{
+	/*
+	 * This also sets the bus_table[i].status to
+	 * STUB_BUSID_OTHER, which is 0.
+	 */
+	memset(busid_table, 0, sizeof(busid_table));
+
+	spin_lock_init(&busid_table_lock);
+}
+
+/*
+ * Find the index of the busid by name.
+ * Must be called with busid_table_lock held.
+ */
+static int get_busid_idx(const char *busid)
+{
+	int i;
+	int idx = -1;
+
+	for (i = 0; i < MAX_BUSID; i++)
+		if (busid_table[i].name[0])
+			if (!strncmp(busid_table[i].name, busid, BUSID_SIZE)) {
+				idx = i;
+				break;
+			}
+	return idx;
+}
+
+struct bus_id_priv *get_busid_priv(const char *busid)
+{
+	int idx;
+	struct bus_id_priv *bid = NULL;
+
+	spin_lock(&busid_table_lock);
+	idx = get_busid_idx(busid);
+	if (idx >= 0)
+		bid = &(busid_table[idx]);
+	spin_unlock(&busid_table_lock);
+
+	return bid;
+}
+
+static int add_match_busid(char *busid)
+{
+	int i;
+	int ret = -1;
+
+	spin_lock(&busid_table_lock);
+	/* already registered? */
+	if (get_busid_idx(busid) >= 0) {
+		ret = 0;
+		goto out;
+	}
+
+	for (i = 0; i < MAX_BUSID; i++)
+		if (!busid_table[i].name[0]) {
+			strlcpy(busid_table[i].name, busid, BUSID_SIZE);
+			if ((busid_table[i].status != STUB_BUSID_ALLOC) &&
+			    (busid_table[i].status != STUB_BUSID_REMOV))
+				busid_table[i].status = STUB_BUSID_ADDED;
+			ret = 0;
+			break;
+		}
+
+out:
+	spin_unlock(&busid_table_lock);
+
+	return ret;
+}
+
+int del_match_busid(char *busid)
+{
+	int idx;
+	int ret = -1;
+
+	spin_lock(&busid_table_lock);
+	idx = get_busid_idx(busid);
+	if (idx < 0)
+		goto out;
+
+	/* found */
+	ret = 0;
+
+	if (busid_table[idx].status == STUB_BUSID_OTHER)
+		memset(busid_table[idx].name, 0, BUSID_SIZE);
+
+	if ((busid_table[idx].status != STUB_BUSID_OTHER) &&
+	    (busid_table[idx].status != STUB_BUSID_ADDED))
+		busid_table[idx].status = STUB_BUSID_REMOV;
+
+out:
+	spin_unlock(&busid_table_lock);
+
+	return ret;
+}
+
+static ssize_t show_match_busid(struct device_driver *drv, char *buf)
+{
+	int i;
+	char *out = buf;
+
+	spin_lock(&busid_table_lock);
+	for (i = 0; i < MAX_BUSID; i++)
+		if (busid_table[i].name[0])
+			out += sprintf(out, "%s ", busid_table[i].name);
+	spin_unlock(&busid_table_lock);
+	out += sprintf(out, "\n");
+
+	return out - buf;
+}
+
+static ssize_t store_match_busid(struct device_driver *dev, const char *buf,
+				 size_t count)
+{
+	int len;
+	char busid[BUSID_SIZE];
+
+	if (count < 5)
+		return -EINVAL;
+
+	/* busid needs to include \0 termination */
+	len = strlcpy(busid, buf + 4, BUSID_SIZE);
+	if (sizeof(busid) <= len)
+		return -EINVAL;
+
+	if (!strncmp(buf, "add ", 4)) {
+		if (add_match_busid(busid) < 0)
+			return -ENOMEM;
+
+		pr_debug("add busid %s\n", busid);
+		return count;
+	}
+
+	if (!strncmp(buf, "del ", 4)) {
+		if (del_match_busid(busid) < 0)
+			return -ENODEV;
+
+		pr_debug("del busid %s\n", busid);
+		return count;
+	}
+
+	return -EINVAL;
+}
+static DRIVER_ATTR(match_busid, S_IRUSR | S_IWUSR, show_match_busid,
+		   store_match_busid);
+
+static ssize_t rebind_store(struct device_driver *dev, const char *buf,
+				 size_t count)
+{
+	int ret;
+	int len;
+	struct bus_id_priv *bid;
+
+	/* buf length should be less that BUSID_SIZE */
+	len = strnlen(buf, BUSID_SIZE);
+
+	if (!(len < BUSID_SIZE))
+		return -EINVAL;
+
+	bid = get_busid_priv(buf);
+	if (!bid)
+		return -ENODEV;
+
+	ret = device_attach(&bid->udev->dev);
+	if (ret < 0) {
+		dev_err(&bid->udev->dev, "rebind failed\n");
+		return ret;
+	}
+
+	return count;
+}
+
+static DRIVER_ATTR_WO(rebind);
+
+static struct stub_priv *stub_priv_pop_from_listhead(struct list_head *listhead)
+{
+	struct stub_priv *priv, *tmp;
+
+	list_for_each_entry_safe(priv, tmp, listhead, list) {
+		list_del(&priv->list);
+		return priv;
+	}
+
+	return NULL;
+}
+
+static struct stub_priv *stub_priv_pop(struct stub_device *sdev)
+{
+	unsigned long flags;
+	struct stub_priv *priv;
+
+	spin_lock_irqsave(&sdev->priv_lock, flags);
+
+	priv = stub_priv_pop_from_listhead(&sdev->priv_init);
+	if (priv)
+		goto done;
+
+	priv = stub_priv_pop_from_listhead(&sdev->priv_tx);
+	if (priv)
+		goto done;
+
+	priv = stub_priv_pop_from_listhead(&sdev->priv_free);
+
+done:
+	spin_unlock_irqrestore(&sdev->priv_lock, flags);
+
+	return priv;
+}
+
+void stub_device_cleanup_urbs(struct stub_device *sdev)
+{
+	struct stub_priv *priv;
+	struct urb *urb;
+
+	dev_dbg(&sdev->udev->dev, "free sdev %p\n", sdev);
+
+	while ((priv = stub_priv_pop(sdev))) {
+		urb = priv->urb;
+		dev_dbg(&sdev->udev->dev, "free urb %p\n", urb);
+		usb_kill_urb(urb);
+
+		kmem_cache_free(stub_priv_cache, priv);
+
+		kfree(urb->transfer_buffer);
+		kfree(urb->setup_packet);
+		usb_free_urb(urb);
+	}
+}
+
+static int __init usbip_host_init(void)
+{
+	int ret;
+
+	init_busid_table();
+
+	stub_priv_cache = KMEM_CACHE(stub_priv, SLAB_HWCACHE_ALIGN);
+	if (!stub_priv_cache) {
+		pr_err("kmem_cache_create failed\n");
+		return -ENOMEM;
+	}
+
+	ret = usb_register_device_driver(&stub_driver, THIS_MODULE);
+	if (ret) {
+		pr_err("usb_register failed %d\n", ret);
+		goto err_usb_register;
+	}
+
+	ret = driver_create_file(&stub_driver.drvwrap.driver,
+				 &driver_attr_match_busid);
+	if (ret) {
+		pr_err("driver_create_file failed\n");
+		goto err_create_file;
+	}
+
+	ret = driver_create_file(&stub_driver.drvwrap.driver,
+				 &driver_attr_rebind);
+	if (ret) {
+		pr_err("driver_create_file failed\n");
+		goto err_create_file;
+	}
+
+	pr_info(DRIVER_DESC " v" USBIP_VERSION "\n");
+	return ret;
+
+err_create_file:
+	usb_deregister_device_driver(&stub_driver);
+err_usb_register:
+	kmem_cache_destroy(stub_priv_cache);
+	return ret;
+}
+
+static void __exit usbip_host_exit(void)
+{
+	driver_remove_file(&stub_driver.drvwrap.driver,
+			   &driver_attr_match_busid);
+
+	driver_remove_file(&stub_driver.drvwrap.driver,
+			   &driver_attr_rebind);
+
+	/*
+	 * deregister() calls stub_disconnect() for all devices. Device
+	 * specific data is cleared in stub_disconnect().
+	 */
+	usb_deregister_device_driver(&stub_driver);
+
+	kmem_cache_destroy(stub_priv_cache);
+}
+
+module_init(usbip_host_init);
+module_exit(usbip_host_exit);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+MODULE_VERSION(USBIP_VERSION);
diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c
new file mode 100644
index 000000000000..00e475c51a12
--- /dev/null
+++ b/drivers/usb/usbip/stub_rx.c
@@ -0,0 +1,594 @@
+/*
+ * Copyright (C) 2003-2008 Takahiro Hirofuchi
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+#include <asm/byteorder.h>
+#include <linux/kthread.h>
+#include <linux/usb.h>
+#include <linux/usb/hcd.h>
+
+#include "usbip_common.h"
+#include "stub.h"
+
+static int is_clear_halt_cmd(struct urb *urb)
+{
+	struct usb_ctrlrequest *req;
+
+	req = (struct usb_ctrlrequest *) urb->setup_packet;
+
+	 return (req->bRequest == USB_REQ_CLEAR_FEATURE) &&
+		 (req->bRequestType == USB_RECIP_ENDPOINT) &&
+		 (req->wValue == USB_ENDPOINT_HALT);
+}
+
+static int is_set_interface_cmd(struct urb *urb)
+{
+	struct usb_ctrlrequest *req;
+
+	req = (struct usb_ctrlrequest *) urb->setup_packet;
+
+	return (req->bRequest == USB_REQ_SET_INTERFACE) &&
+		(req->bRequestType == USB_RECIP_INTERFACE);
+}
+
+static int is_set_configuration_cmd(struct urb *urb)
+{
+	struct usb_ctrlrequest *req;
+
+	req = (struct usb_ctrlrequest *) urb->setup_packet;
+
+	return (req->bRequest == USB_REQ_SET_CONFIGURATION) &&
+		(req->bRequestType == USB_RECIP_DEVICE);
+}
+
+static int is_reset_device_cmd(struct urb *urb)
+{
+	struct usb_ctrlrequest *req;
+	__u16 value;
+	__u16 index;
+
+	req = (struct usb_ctrlrequest *) urb->setup_packet;
+	value = le16_to_cpu(req->wValue);
+	index = le16_to_cpu(req->wIndex);
+
+	if ((req->bRequest == USB_REQ_SET_FEATURE) &&
+	    (req->bRequestType == USB_RT_PORT) &&
+	    (value == USB_PORT_FEAT_RESET)) {
+		usbip_dbg_stub_rx("reset_device_cmd, port %u\n", index);
+		return 1;
+	} else
+		return 0;
+}
+
+static int tweak_clear_halt_cmd(struct urb *urb)
+{
+	struct usb_ctrlrequest *req;
+	int target_endp;
+	int target_dir;
+	int target_pipe;
+	int ret;
+
+	req = (struct usb_ctrlrequest *) urb->setup_packet;
+
+	/*
+	 * The stalled endpoint is specified in the wIndex value. The endpoint
+	 * of the urb is the target of this clear_halt request (i.e., control
+	 * endpoint).
+	 */
+	target_endp = le16_to_cpu(req->wIndex) & 0x000f;
+
+	/* the stalled endpoint direction is IN or OUT?. USB_DIR_IN is 0x80.  */
+	target_dir = le16_to_cpu(req->wIndex) & 0x0080;
+
+	if (target_dir)
+		target_pipe = usb_rcvctrlpipe(urb->dev, target_endp);
+	else
+		target_pipe = usb_sndctrlpipe(urb->dev, target_endp);
+
+	ret = usb_clear_halt(urb->dev, target_pipe);
+	if (ret < 0)
+		dev_err(&urb->dev->dev,
+			"usb_clear_halt error: devnum %d endp %d ret %d\n",
+			urb->dev->devnum, target_endp, ret);
+	else
+		dev_info(&urb->dev->dev,
+			 "usb_clear_halt done: devnum %d endp %d\n",
+			 urb->dev->devnum, target_endp);
+
+	return ret;
+}
+
+static int tweak_set_interface_cmd(struct urb *urb)
+{
+	struct usb_ctrlrequest *req;
+	__u16 alternate;
+	__u16 interface;
+	int ret;
+
+	req = (struct usb_ctrlrequest *) urb->setup_packet;
+	alternate = le16_to_cpu(req->wValue);
+	interface = le16_to_cpu(req->wIndex);
+
+	usbip_dbg_stub_rx("set_interface: inf %u alt %u\n",
+			  interface, alternate);
+
+	ret = usb_set_interface(urb->dev, interface, alternate);
+	if (ret < 0)
+		dev_err(&urb->dev->dev,
+			"usb_set_interface error: inf %u alt %u ret %d\n",
+			interface, alternate, ret);
+	else
+		dev_info(&urb->dev->dev,
+			"usb_set_interface done: inf %u alt %u\n",
+			interface, alternate);
+
+	return ret;
+}
+
+static int tweak_set_configuration_cmd(struct urb *urb)
+{
+	struct stub_priv *priv = (struct stub_priv *) urb->context;
+	struct stub_device *sdev = priv->sdev;
+	struct usb_ctrlrequest *req;
+	__u16 config;
+	int err;
+
+	req = (struct usb_ctrlrequest *) urb->setup_packet;
+	config = le16_to_cpu(req->wValue);
+
+	err = usb_set_configuration(sdev->udev, config);
+	if (err && err != -ENODEV)
+		dev_err(&sdev->udev->dev, "can't set config #%d, error %d\n",
+			config, err);
+	return 0;
+}
+
+static int tweak_reset_device_cmd(struct urb *urb)
+{
+	struct stub_priv *priv = (struct stub_priv *) urb->context;
+	struct stub_device *sdev = priv->sdev;
+
+	dev_info(&urb->dev->dev, "usb_queue_reset_device\n");
+
+	/*
+	 * With the implementation of pre_reset and post_reset the driver no
+	 * longer unbinds. This allows the use of synchronous reset.
+	 */
+
+	if (usb_lock_device_for_reset(sdev->udev, sdev->interface) < 0) {
+		dev_err(&urb->dev->dev, "could not obtain lock to reset device\n");
+		return 0;
+	}
+	usb_reset_device(sdev->udev);
+	usb_unlock_device(sdev->udev);
+
+	return 0;
+}
+
+/*
+ * clear_halt, set_interface, and set_configuration require special tricks.
+ */
+static void tweak_special_requests(struct urb *urb)
+{
+	if (!urb || !urb->setup_packet)
+		return;
+
+	if (usb_pipetype(urb->pipe) != PIPE_CONTROL)
+		return;
+
+	if (is_clear_halt_cmd(urb))
+		/* tweak clear_halt */
+		 tweak_clear_halt_cmd(urb);
+
+	else if (is_set_interface_cmd(urb))
+		/* tweak set_interface */
+		tweak_set_interface_cmd(urb);
+
+	else if (is_set_configuration_cmd(urb))
+		/* tweak set_configuration */
+		tweak_set_configuration_cmd(urb);
+
+	else if (is_reset_device_cmd(urb))
+		tweak_reset_device_cmd(urb);
+	else
+		usbip_dbg_stub_rx("no need to tweak\n");
+}
+
+/*
+ * stub_recv_unlink() unlinks the URB by a call to usb_unlink_urb().
+ * By unlinking the urb asynchronously, stub_rx can continuously
+ * process coming urbs.  Even if the urb is unlinked, its completion
+ * handler will be called and stub_tx will send a return pdu.
+ *
+ * See also comments about unlinking strategy in vhci_hcd.c.
+ */
+static int stub_recv_cmd_unlink(struct stub_device *sdev,
+				struct usbip_header *pdu)
+{
+	int ret;
+	unsigned long flags;
+	struct stub_priv *priv;
+
+	spin_lock_irqsave(&sdev->priv_lock, flags);
+
+	list_for_each_entry(priv, &sdev->priv_init, list) {
+		if (priv->seqnum != pdu->u.cmd_unlink.seqnum)
+			continue;
+
+		dev_info(&priv->urb->dev->dev, "unlink urb %p\n",
+			 priv->urb);
+
+		/*
+		 * This matched urb is not completed yet (i.e., be in
+		 * flight in usb hcd hardware/driver). Now we are
+		 * cancelling it. The unlinking flag means that we are
+		 * now not going to return the normal result pdu of a
+		 * submission request, but going to return a result pdu
+		 * of the unlink request.
+		 */
+		priv->unlinking = 1;
+
+		/*
+		 * In the case that unlinking flag is on, prev->seqnum
+		 * is changed from the seqnum of the cancelling urb to
+		 * the seqnum of the unlink request. This will be used
+		 * to make the result pdu of the unlink request.
+		 */
+		priv->seqnum = pdu->base.seqnum;
+
+		spin_unlock_irqrestore(&sdev->priv_lock, flags);
+
+		/*
+		 * usb_unlink_urb() is now out of spinlocking to avoid
+		 * spinlock recursion since stub_complete() is
+		 * sometimes called in this context but not in the
+		 * interrupt context.  If stub_complete() is executed
+		 * before we call usb_unlink_urb(), usb_unlink_urb()
+		 * will return an error value. In this case, stub_tx
+		 * will return the result pdu of this unlink request
+		 * though submission is completed and actual unlinking
+		 * is not executed. OK?
+		 */
+		/* In the above case, urb->status is not -ECONNRESET,
+		 * so a driver in a client host will know the failure
+		 * of the unlink request ?
+		 */
+		ret = usb_unlink_urb(priv->urb);
+		if (ret != -EINPROGRESS)
+			dev_err(&priv->urb->dev->dev,
+				"failed to unlink a urb %p, ret %d\n",
+				priv->urb, ret);
+
+		return 0;
+	}
+
+	usbip_dbg_stub_rx("seqnum %d is not pending\n",
+			  pdu->u.cmd_unlink.seqnum);
+
+	/*
+	 * The urb of the unlink target is not found in priv_init queue. It was
+	 * already completed and its results is/was going to be sent by a
+	 * CMD_RET pdu. In this case, usb_unlink_urb() is not needed. We only
+	 * return the completeness of this unlink request to vhci_hcd.
+	 */
+	stub_enqueue_ret_unlink(sdev, pdu->base.seqnum, 0);
+
+	spin_unlock_irqrestore(&sdev->priv_lock, flags);
+
+	return 0;
+}
+
+static int valid_request(struct stub_device *sdev, struct usbip_header *pdu)
+{
+	struct usbip_device *ud = &sdev->ud;
+	int valid = 0;
+
+	if (pdu->base.devid == sdev->devid) {
+		spin_lock_irq(&ud->lock);
+		if (ud->status == SDEV_ST_USED) {
+			/* A request is valid. */
+			valid = 1;
+		}
+		spin_unlock_irq(&ud->lock);
+	}
+
+	return valid;
+}
+
+static struct stub_priv *stub_priv_alloc(struct stub_device *sdev,
+					 struct usbip_header *pdu)
+{
+	struct stub_priv *priv;
+	struct usbip_device *ud = &sdev->ud;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sdev->priv_lock, flags);
+
+	priv = kmem_cache_zalloc(stub_priv_cache, GFP_ATOMIC);
+	if (!priv) {
+		dev_err(&sdev->interface->dev, "alloc stub_priv\n");
+		spin_unlock_irqrestore(&sdev->priv_lock, flags);
+		usbip_event_add(ud, SDEV_EVENT_ERROR_MALLOC);
+		return NULL;
+	}
+
+	priv->seqnum = pdu->base.seqnum;
+	priv->sdev = sdev;
+
+	/*
+	 * After a stub_priv is linked to a list_head,
+	 * our error handler can free allocated data.
+	 */
+	list_add_tail(&priv->list, &sdev->priv_init);
+
+	spin_unlock_irqrestore(&sdev->priv_lock, flags);
+
+	return priv;
+}
+
+static int get_pipe(struct stub_device *sdev, int epnum, int dir)
+{
+	struct usb_device *udev = sdev->udev;
+	struct usb_host_endpoint *ep;
+	struct usb_endpoint_descriptor *epd = NULL;
+
+	if (dir == USBIP_DIR_IN)
+		ep = udev->ep_in[epnum & 0x7f];
+	else
+		ep = udev->ep_out[epnum & 0x7f];
+	if (!ep) {
+		dev_err(&sdev->interface->dev, "no such endpoint?, %d\n",
+			epnum);
+		BUG();
+	}
+
+	epd = &ep->desc;
+	if (usb_endpoint_xfer_control(epd)) {
+		if (dir == USBIP_DIR_OUT)
+			return usb_sndctrlpipe(udev, epnum);
+		else
+			return usb_rcvctrlpipe(udev, epnum);
+	}
+
+	if (usb_endpoint_xfer_bulk(epd)) {
+		if (dir == USBIP_DIR_OUT)
+			return usb_sndbulkpipe(udev, epnum);
+		else
+			return usb_rcvbulkpipe(udev, epnum);
+	}
+
+	if (usb_endpoint_xfer_int(epd)) {
+		if (dir == USBIP_DIR_OUT)
+			return usb_sndintpipe(udev, epnum);
+		else
+			return usb_rcvintpipe(udev, epnum);
+	}
+
+	if (usb_endpoint_xfer_isoc(epd)) {
+		if (dir == USBIP_DIR_OUT)
+			return usb_sndisocpipe(udev, epnum);
+		else
+			return usb_rcvisocpipe(udev, epnum);
+	}
+
+	/* NOT REACHED */
+	dev_err(&sdev->interface->dev, "get pipe, epnum %d\n", epnum);
+	return 0;
+}
+
+static void masking_bogus_flags(struct urb *urb)
+{
+	int				xfertype;
+	struct usb_device		*dev;
+	struct usb_host_endpoint	*ep;
+	int				is_out;
+	unsigned int	allowed;
+
+	if (!urb || urb->hcpriv || !urb->complete)
+		return;
+	dev = urb->dev;
+	if ((!dev) || (dev->state < USB_STATE_UNAUTHENTICATED))
+		return;
+
+	ep = (usb_pipein(urb->pipe) ? dev->ep_in : dev->ep_out)
+		[usb_pipeendpoint(urb->pipe)];
+	if (!ep)
+		return;
+
+	xfertype = usb_endpoint_type(&ep->desc);
+	if (xfertype == USB_ENDPOINT_XFER_CONTROL) {
+		struct usb_ctrlrequest *setup =
+			(struct usb_ctrlrequest *) urb->setup_packet;
+
+		if (!setup)
+			return;
+		is_out = !(setup->bRequestType & USB_DIR_IN) ||
+			!setup->wLength;
+	} else {
+		is_out = usb_endpoint_dir_out(&ep->desc);
+	}
+
+	/* enforce simple/standard policy */
+	allowed = (URB_NO_TRANSFER_DMA_MAP | URB_NO_INTERRUPT |
+		   URB_DIR_MASK | URB_FREE_BUFFER);
+	switch (xfertype) {
+	case USB_ENDPOINT_XFER_BULK:
+		if (is_out)
+			allowed |= URB_ZERO_PACKET;
+		/* FALLTHROUGH */
+	case USB_ENDPOINT_XFER_CONTROL:
+		allowed |= URB_NO_FSBR;	/* only affects UHCI */
+		/* FALLTHROUGH */
+	default:			/* all non-iso endpoints */
+		if (!is_out)
+			allowed |= URB_SHORT_NOT_OK;
+		break;
+	case USB_ENDPOINT_XFER_ISOC:
+		allowed |= URB_ISO_ASAP;
+		break;
+	}
+	urb->transfer_flags &= allowed;
+}
+
+static void stub_recv_cmd_submit(struct stub_device *sdev,
+				 struct usbip_header *pdu)
+{
+	int ret;
+	struct stub_priv *priv;
+	struct usbip_device *ud = &sdev->ud;
+	struct usb_device *udev = sdev->udev;
+	int pipe = get_pipe(sdev, pdu->base.ep, pdu->base.direction);
+
+	priv = stub_priv_alloc(sdev, pdu);
+	if (!priv)
+		return;
+
+	/* setup a urb */
+	if (usb_pipeisoc(pipe))
+		priv->urb = usb_alloc_urb(pdu->u.cmd_submit.number_of_packets,
+					  GFP_KERNEL);
+	else
+		priv->urb = usb_alloc_urb(0, GFP_KERNEL);
+
+	if (!priv->urb) {
+		dev_err(&sdev->interface->dev, "malloc urb\n");
+		usbip_event_add(ud, SDEV_EVENT_ERROR_MALLOC);
+		return;
+	}
+
+	/* allocate urb transfer buffer, if needed */
+	if (pdu->u.cmd_submit.transfer_buffer_length > 0) {
+		priv->urb->transfer_buffer =
+			kzalloc(pdu->u.cmd_submit.transfer_buffer_length,
+				GFP_KERNEL);
+		if (!priv->urb->transfer_buffer) {
+			usbip_event_add(ud, SDEV_EVENT_ERROR_MALLOC);
+			return;
+		}
+	}
+
+	/* copy urb setup packet */
+	priv->urb->setup_packet = kmemdup(&pdu->u.cmd_submit.setup, 8,
+					  GFP_KERNEL);
+	if (!priv->urb->setup_packet) {
+		dev_err(&sdev->interface->dev, "allocate setup_packet\n");
+		usbip_event_add(ud, SDEV_EVENT_ERROR_MALLOC);
+		return;
+	}
+
+	/* set other members from the base header of pdu */
+	priv->urb->context                = (void *) priv;
+	priv->urb->dev                    = udev;
+	priv->urb->pipe                   = pipe;
+	priv->urb->complete               = stub_complete;
+
+	usbip_pack_pdu(pdu, priv->urb, USBIP_CMD_SUBMIT, 0);
+
+
+	if (usbip_recv_xbuff(ud, priv->urb) < 0)
+		return;
+
+	if (usbip_recv_iso(ud, priv->urb) < 0)
+		return;
+
+	/* no need to submit an intercepted request, but harmless? */
+	tweak_special_requests(priv->urb);
+
+	masking_bogus_flags(priv->urb);
+	/* urb is now ready to submit */
+	ret = usb_submit_urb(priv->urb, GFP_KERNEL);
+
+	if (ret == 0)
+		usbip_dbg_stub_rx("submit urb ok, seqnum %u\n",
+				  pdu->base.seqnum);
+	else {
+		dev_err(&sdev->interface->dev, "submit_urb error, %d\n", ret);
+		usbip_dump_header(pdu);
+		usbip_dump_urb(priv->urb);
+
+		/*
+		 * Pessimistic.
+		 * This connection will be discarded.
+		 */
+		usbip_event_add(ud, SDEV_EVENT_ERROR_SUBMIT);
+	}
+
+	usbip_dbg_stub_rx("Leave\n");
+}
+
+/* recv a pdu */
+static void stub_rx_pdu(struct usbip_device *ud)
+{
+	int ret;
+	struct usbip_header pdu;
+	struct stub_device *sdev = container_of(ud, struct stub_device, ud);
+	struct device *dev = &sdev->udev->dev;
+
+	usbip_dbg_stub_rx("Enter\n");
+
+	memset(&pdu, 0, sizeof(pdu));
+
+	/* receive a pdu header */
+	ret = usbip_recv(ud->tcp_socket, &pdu, sizeof(pdu));
+	if (ret != sizeof(pdu)) {
+		dev_err(dev, "recv a header, %d\n", ret);
+		usbip_event_add(ud, SDEV_EVENT_ERROR_TCP);
+		return;
+	}
+
+	usbip_header_correct_endian(&pdu, 0);
+
+	if (usbip_dbg_flag_stub_rx)
+		usbip_dump_header(&pdu);
+
+	if (!valid_request(sdev, &pdu)) {
+		dev_err(dev, "recv invalid request\n");
+		usbip_event_add(ud, SDEV_EVENT_ERROR_TCP);
+		return;
+	}
+
+	switch (pdu.base.command) {
+	case USBIP_CMD_UNLINK:
+		stub_recv_cmd_unlink(sdev, &pdu);
+		break;
+
+	case USBIP_CMD_SUBMIT:
+		stub_recv_cmd_submit(sdev, &pdu);
+		break;
+
+	default:
+		/* NOTREACHED */
+		dev_err(dev, "unknown pdu\n");
+		usbip_event_add(ud, SDEV_EVENT_ERROR_TCP);
+		break;
+	}
+}
+
+int stub_rx_loop(void *data)
+{
+	struct usbip_device *ud = data;
+
+	while (!kthread_should_stop()) {
+		if (usbip_event_happened(ud))
+			break;
+
+		stub_rx_pdu(ud);
+	}
+
+	return 0;
+}
diff --git a/drivers/usb/usbip/stub_tx.c b/drivers/usb/usbip/stub_tx.c
new file mode 100644
index 000000000000..dbcabc9dbe0d
--- /dev/null
+++ b/drivers/usb/usbip/stub_tx.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright (C) 2003-2008 Takahiro Hirofuchi
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+#include <linux/kthread.h>
+#include <linux/socket.h>
+
+#include "usbip_common.h"
+#include "stub.h"
+
+static void stub_free_priv_and_urb(struct stub_priv *priv)
+{
+	struct urb *urb = priv->urb;
+
+	kfree(urb->setup_packet);
+	kfree(urb->transfer_buffer);
+	list_del(&priv->list);
+	kmem_cache_free(stub_priv_cache, priv);
+	usb_free_urb(urb);
+}
+
+/* be in spin_lock_irqsave(&sdev->priv_lock, flags) */
+void stub_enqueue_ret_unlink(struct stub_device *sdev, __u32 seqnum,
+			     __u32 status)
+{
+	struct stub_unlink *unlink;
+
+	unlink = kzalloc(sizeof(struct stub_unlink), GFP_ATOMIC);
+	if (!unlink) {
+		usbip_event_add(&sdev->ud, VDEV_EVENT_ERROR_MALLOC);
+		return;
+	}
+
+	unlink->seqnum = seqnum;
+	unlink->status = status;
+
+	list_add_tail(&unlink->list, &sdev->unlink_tx);
+}
+
+/**
+ * stub_complete - completion handler of a usbip urb
+ * @urb: pointer to the urb completed
+ *
+ * When a urb has completed, the USB core driver calls this function mostly in
+ * the interrupt context. To return the result of a urb, the completed urb is
+ * linked to the pending list of returning.
+ *
+ */
+void stub_complete(struct urb *urb)
+{
+	struct stub_priv *priv = (struct stub_priv *) urb->context;
+	struct stub_device *sdev = priv->sdev;
+	unsigned long flags;
+
+	usbip_dbg_stub_tx("complete! status %d\n", urb->status);
+
+	switch (urb->status) {
+	case 0:
+		/* OK */
+		break;
+	case -ENOENT:
+		dev_info(&urb->dev->dev,
+			 "stopped by a call to usb_kill_urb() because of cleaning up a virtual connection\n");
+		return;
+	case -ECONNRESET:
+		dev_info(&urb->dev->dev,
+			 "unlinked by a call to usb_unlink_urb()\n");
+		break;
+	case -EPIPE:
+		dev_info(&urb->dev->dev, "endpoint %d is stalled\n",
+			 usb_pipeendpoint(urb->pipe));
+		break;
+	case -ESHUTDOWN:
+		dev_info(&urb->dev->dev, "device removed?\n");
+		break;
+	default:
+		dev_info(&urb->dev->dev,
+			 "urb completion with non-zero status %d\n",
+			 urb->status);
+		break;
+	}
+
+	/* link a urb to the queue of tx. */
+	spin_lock_irqsave(&sdev->priv_lock, flags);
+	if (priv->unlinking) {
+		stub_enqueue_ret_unlink(sdev, priv->seqnum, urb->status);
+		stub_free_priv_and_urb(priv);
+	} else {
+		list_move_tail(&priv->list, &sdev->priv_tx);
+	}
+	spin_unlock_irqrestore(&sdev->priv_lock, flags);
+
+	/* wake up tx_thread */
+	wake_up(&sdev->tx_waitq);
+}
+
+static inline void setup_base_pdu(struct usbip_header_basic *base,
+				  __u32 command, __u32 seqnum)
+{
+	base->command	= command;
+	base->seqnum	= seqnum;
+	base->devid	= 0;
+	base->ep	= 0;
+	base->direction = 0;
+}
+
+static void setup_ret_submit_pdu(struct usbip_header *rpdu, struct urb *urb)
+{
+	struct stub_priv *priv = (struct stub_priv *) urb->context;
+
+	setup_base_pdu(&rpdu->base, USBIP_RET_SUBMIT, priv->seqnum);
+	usbip_pack_pdu(rpdu, urb, USBIP_RET_SUBMIT, 1);
+}
+
+static void setup_ret_unlink_pdu(struct usbip_header *rpdu,
+				 struct stub_unlink *unlink)
+{
+	setup_base_pdu(&rpdu->base, USBIP_RET_UNLINK, unlink->seqnum);
+	rpdu->u.ret_unlink.status = unlink->status;
+}
+
+static struct stub_priv *dequeue_from_priv_tx(struct stub_device *sdev)
+{
+	unsigned long flags;
+	struct stub_priv *priv, *tmp;
+
+	spin_lock_irqsave(&sdev->priv_lock, flags);
+
+	list_for_each_entry_safe(priv, tmp, &sdev->priv_tx, list) {
+		list_move_tail(&priv->list, &sdev->priv_free);
+		spin_unlock_irqrestore(&sdev->priv_lock, flags);
+		return priv;
+	}
+
+	spin_unlock_irqrestore(&sdev->priv_lock, flags);
+
+	return NULL;
+}
+
+static int stub_send_ret_submit(struct stub_device *sdev)
+{
+	unsigned long flags;
+	struct stub_priv *priv, *tmp;
+
+	struct msghdr msg;
+	size_t txsize;
+
+	size_t total_size = 0;
+
+	while ((priv = dequeue_from_priv_tx(sdev)) != NULL) {
+		int ret;
+		struct urb *urb = priv->urb;
+		struct usbip_header pdu_header;
+		struct usbip_iso_packet_descriptor *iso_buffer = NULL;
+		struct kvec *iov = NULL;
+		int iovnum = 0;
+
+		txsize = 0;
+		memset(&pdu_header, 0, sizeof(pdu_header));
+		memset(&msg, 0, sizeof(msg));
+
+		if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS)
+			iovnum = 2 + urb->number_of_packets;
+		else
+			iovnum = 2;
+
+		iov = kcalloc(iovnum, sizeof(struct kvec), GFP_KERNEL);
+
+		if (!iov) {
+			usbip_event_add(&sdev->ud, SDEV_EVENT_ERROR_MALLOC);
+			return -1;
+		}
+
+		iovnum = 0;
+
+		/* 1. setup usbip_header */
+		setup_ret_submit_pdu(&pdu_header, urb);
+		usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n",
+				  pdu_header.base.seqnum, urb);
+		usbip_header_correct_endian(&pdu_header, 1);
+
+		iov[iovnum].iov_base = &pdu_header;
+		iov[iovnum].iov_len  = sizeof(pdu_header);
+		iovnum++;
+		txsize += sizeof(pdu_header);
+
+		/* 2. setup transfer buffer */
+		if (usb_pipein(urb->pipe) &&
+		    usb_pipetype(urb->pipe) != PIPE_ISOCHRONOUS &&
+		    urb->actual_length > 0) {
+			iov[iovnum].iov_base = urb->transfer_buffer;
+			iov[iovnum].iov_len  = urb->actual_length;
+			iovnum++;
+			txsize += urb->actual_length;
+		} else if (usb_pipein(urb->pipe) &&
+			   usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) {
+			/*
+			 * For isochronous packets: actual length is the sum of
+			 * the actual length of the individual, packets, but as
+			 * the packet offsets are not changed there will be
+			 * padding between the packets. To optimally use the
+			 * bandwidth the padding is not transmitted.
+			 */
+
+			int i;
+
+			for (i = 0; i < urb->number_of_packets; i++) {
+				iov[iovnum].iov_base = urb->transfer_buffer +
+					urb->iso_frame_desc[i].offset;
+				iov[iovnum].iov_len =
+					urb->iso_frame_desc[i].actual_length;
+				iovnum++;
+				txsize += urb->iso_frame_desc[i].actual_length;
+			}
+
+			if (txsize != sizeof(pdu_header) + urb->actual_length) {
+				dev_err(&sdev->interface->dev,
+					"actual length of urb %d does not match iso packet sizes %zu\n",
+					urb->actual_length,
+					txsize-sizeof(pdu_header));
+				kfree(iov);
+				usbip_event_add(&sdev->ud,
+						SDEV_EVENT_ERROR_TCP);
+			   return -1;
+			}
+		}
+
+		/* 3. setup iso_packet_descriptor */
+		if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) {
+			ssize_t len = 0;
+
+			iso_buffer = usbip_alloc_iso_desc_pdu(urb, &len);
+			if (!iso_buffer) {
+				usbip_event_add(&sdev->ud,
+						SDEV_EVENT_ERROR_MALLOC);
+				kfree(iov);
+				return -1;
+			}
+
+			iov[iovnum].iov_base = iso_buffer;
+			iov[iovnum].iov_len  = len;
+			txsize += len;
+			iovnum++;
+		}
+
+		ret = kernel_sendmsg(sdev->ud.tcp_socket, &msg,
+						iov,  iovnum, txsize);
+		if (ret != txsize) {
+			dev_err(&sdev->interface->dev,
+				"sendmsg failed!, retval %d for %zd\n",
+				ret, txsize);
+			kfree(iov);
+			kfree(iso_buffer);
+			usbip_event_add(&sdev->ud, SDEV_EVENT_ERROR_TCP);
+			return -1;
+		}
+
+		kfree(iov);
+		kfree(iso_buffer);
+
+		total_size += txsize;
+	}
+
+	spin_lock_irqsave(&sdev->priv_lock, flags);
+	list_for_each_entry_safe(priv, tmp, &sdev->priv_free, list) {
+		stub_free_priv_and_urb(priv);
+	}
+	spin_unlock_irqrestore(&sdev->priv_lock, flags);
+
+	return total_size;
+}
+
+static struct stub_unlink *dequeue_from_unlink_tx(struct stub_device *sdev)
+{
+	unsigned long flags;
+	struct stub_unlink *unlink, *tmp;
+
+	spin_lock_irqsave(&sdev->priv_lock, flags);
+
+	list_for_each_entry_safe(unlink, tmp, &sdev->unlink_tx, list) {
+		list_move_tail(&unlink->list, &sdev->unlink_free);
+		spin_unlock_irqrestore(&sdev->priv_lock, flags);
+		return unlink;
+	}
+
+	spin_unlock_irqrestore(&sdev->priv_lock, flags);
+
+	return NULL;
+}
+
+static int stub_send_ret_unlink(struct stub_device *sdev)
+{
+	unsigned long flags;
+	struct stub_unlink *unlink, *tmp;
+
+	struct msghdr msg;
+	struct kvec iov[1];
+	size_t txsize;
+
+	size_t total_size = 0;
+
+	while ((unlink = dequeue_from_unlink_tx(sdev)) != NULL) {
+		int ret;
+		struct usbip_header pdu_header;
+
+		txsize = 0;
+		memset(&pdu_header, 0, sizeof(pdu_header));
+		memset(&msg, 0, sizeof(msg));
+		memset(&iov, 0, sizeof(iov));
+
+		usbip_dbg_stub_tx("setup ret unlink %lu\n", unlink->seqnum);
+
+		/* 1. setup usbip_header */
+		setup_ret_unlink_pdu(&pdu_header, unlink);
+		usbip_header_correct_endian(&pdu_header, 1);
+
+		iov[0].iov_base = &pdu_header;
+		iov[0].iov_len  = sizeof(pdu_header);
+		txsize += sizeof(pdu_header);
+
+		ret = kernel_sendmsg(sdev->ud.tcp_socket, &msg, iov,
+				     1, txsize);
+		if (ret != txsize) {
+			dev_err(&sdev->interface->dev,
+				"sendmsg failed!, retval %d for %zd\n",
+				ret, txsize);
+			usbip_event_add(&sdev->ud, SDEV_EVENT_ERROR_TCP);
+			return -1;
+		}
+
+		usbip_dbg_stub_tx("send txdata\n");
+		total_size += txsize;
+	}
+
+	spin_lock_irqsave(&sdev->priv_lock, flags);
+
+	list_for_each_entry_safe(unlink, tmp, &sdev->unlink_free, list) {
+		list_del(&unlink->list);
+		kfree(unlink);
+	}
+
+	spin_unlock_irqrestore(&sdev->priv_lock, flags);
+
+	return total_size;
+}
+
+int stub_tx_loop(void *data)
+{
+	struct usbip_device *ud = data;
+	struct stub_device *sdev = container_of(ud, struct stub_device, ud);
+
+	while (!kthread_should_stop()) {
+		if (usbip_event_happened(ud))
+			break;
+
+		/*
+		 * send_ret_submit comes earlier than send_ret_unlink.  stub_rx
+		 * looks at only priv_init queue. If the completion of a URB is
+		 * earlier than the receive of CMD_UNLINK, priv is moved to
+		 * priv_tx queue and stub_rx does not find the target priv. In
+		 * this case, vhci_rx receives the result of the submit request
+		 * and then receives the result of the unlink request. The
+		 * result of the submit is given back to the usbcore as the
+		 * completion of the unlink request. The request of the
+		 * unlink is ignored. This is ok because a driver who calls
+		 * usb_unlink_urb() understands the unlink was too late by
+		 * getting the status of the given-backed URB which has the
+		 * status of usb_submit_urb().
+		 */
+		if (stub_send_ret_submit(sdev) < 0)
+			break;
+
+		if (stub_send_ret_unlink(sdev) < 0)
+			break;
+
+		wait_event_interruptible(sdev->tx_waitq,
+					 (!list_empty(&sdev->priv_tx) ||
+					  !list_empty(&sdev->unlink_tx) ||
+					  kthread_should_stop()));
+	}
+
+	return 0;
+}
diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
new file mode 100644
index 000000000000..facaaf003f19
--- /dev/null
+++ b/drivers/usb/usbip/usbip_common.c
@@ -0,0 +1,776 @@
+/*
+ * Copyright (C) 2003-2008 Takahiro Hirofuchi
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+#include <asm/byteorder.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <net/sock.h>
+
+#include "usbip_common.h"
+
+#define DRIVER_AUTHOR "Takahiro Hirofuchi <hirofuchi@users.sourceforge.net>"
+#define DRIVER_DESC "USB/IP Core"
+
+#ifdef CONFIG_USBIP_DEBUG
+unsigned long usbip_debug_flag = 0xffffffff;
+#else
+unsigned long usbip_debug_flag;
+#endif
+EXPORT_SYMBOL_GPL(usbip_debug_flag);
+module_param(usbip_debug_flag, ulong, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(usbip_debug_flag, "debug flags (defined in usbip_common.h)");
+
+/* FIXME */
+struct device_attribute dev_attr_usbip_debug;
+EXPORT_SYMBOL_GPL(dev_attr_usbip_debug);
+
+static ssize_t usbip_debug_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lx\n", usbip_debug_flag);
+}
+
+static ssize_t usbip_debug_store(struct device *dev,
+				 struct device_attribute *attr, const char *buf,
+				 size_t count)
+{
+	if (sscanf(buf, "%lx", &usbip_debug_flag) != 1)
+		return -EINVAL;
+	return count;
+}
+DEVICE_ATTR_RW(usbip_debug);
+
+static void usbip_dump_buffer(char *buff, int bufflen)
+{
+	print_hex_dump(KERN_DEBUG, "usbip-core", DUMP_PREFIX_OFFSET, 16, 4,
+		       buff, bufflen, false);
+}
+
+static void usbip_dump_pipe(unsigned int p)
+{
+	unsigned char type = usb_pipetype(p);
+	unsigned char ep   = usb_pipeendpoint(p);
+	unsigned char dev  = usb_pipedevice(p);
+	unsigned char dir  = usb_pipein(p);
+
+	pr_debug("dev(%d) ep(%d) [%s] ", dev, ep, dir ? "IN" : "OUT");
+
+	switch (type) {
+	case PIPE_ISOCHRONOUS:
+		pr_debug("ISO\n");
+		break;
+	case PIPE_INTERRUPT:
+		pr_debug("INT\n");
+		break;
+	case PIPE_CONTROL:
+		pr_debug("CTRL\n");
+		break;
+	case PIPE_BULK:
+		pr_debug("BULK\n");
+		break;
+	default:
+		pr_debug("ERR\n");
+		break;
+	}
+}
+
+static void usbip_dump_usb_device(struct usb_device *udev)
+{
+	struct device *dev = &udev->dev;
+	int i;
+
+	dev_dbg(dev, "       devnum(%d) devpath(%s) usb speed(%s)",
+		udev->devnum, udev->devpath, usb_speed_string(udev->speed));
+
+	pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport);
+
+	dev_dbg(dev, "                    ");
+	for (i = 0; i < 16; i++)
+		pr_debug(" %2u", i);
+	pr_debug("\n");
+
+	dev_dbg(dev, "       toggle0(IN) :");
+	for (i = 0; i < 16; i++)
+		pr_debug(" %2u", (udev->toggle[0] & (1 << i)) ? 1 : 0);
+	pr_debug("\n");
+
+	dev_dbg(dev, "       toggle1(OUT):");
+	for (i = 0; i < 16; i++)
+		pr_debug(" %2u", (udev->toggle[1] & (1 << i)) ? 1 : 0);
+	pr_debug("\n");
+
+	dev_dbg(dev, "       epmaxp_in   :");
+	for (i = 0; i < 16; i++) {
+		if (udev->ep_in[i])
+			pr_debug(" %2u",
+			    le16_to_cpu(udev->ep_in[i]->desc.wMaxPacketSize));
+	}
+	pr_debug("\n");
+
+	dev_dbg(dev, "       epmaxp_out  :");
+	for (i = 0; i < 16; i++) {
+		if (udev->ep_out[i])
+			pr_debug(" %2u",
+			    le16_to_cpu(udev->ep_out[i]->desc.wMaxPacketSize));
+	}
+	pr_debug("\n");
+
+	dev_dbg(dev, "parent %p, bus %p\n", udev->parent, udev->bus);
+
+	dev_dbg(dev,
+		"descriptor %p, config %p, actconfig %p, rawdescriptors %p\n",
+		&udev->descriptor, udev->config,
+		udev->actconfig, udev->rawdescriptors);
+
+	dev_dbg(dev, "have_langid %d, string_langid %d\n",
+		udev->have_langid, udev->string_langid);
+
+	dev_dbg(dev, "maxchild %d\n", udev->maxchild);
+}
+
+static void usbip_dump_request_type(__u8 rt)
+{
+	switch (rt & USB_RECIP_MASK) {
+	case USB_RECIP_DEVICE:
+		pr_debug("DEVICE");
+		break;
+	case USB_RECIP_INTERFACE:
+		pr_debug("INTERF");
+		break;
+	case USB_RECIP_ENDPOINT:
+		pr_debug("ENDPOI");
+		break;
+	case USB_RECIP_OTHER:
+		pr_debug("OTHER ");
+		break;
+	default:
+		pr_debug("------");
+		break;
+	}
+}
+
+static void usbip_dump_usb_ctrlrequest(struct usb_ctrlrequest *cmd)
+{
+	if (!cmd) {
+		pr_debug("       : null pointer\n");
+		return;
+	}
+
+	pr_debug("       ");
+	pr_debug("bRequestType(%02X) bRequest(%02X) wValue(%04X) wIndex(%04X) wLength(%04X) ",
+		 cmd->bRequestType, cmd->bRequest,
+		 cmd->wValue, cmd->wIndex, cmd->wLength);
+	pr_debug("\n       ");
+
+	if ((cmd->bRequestType & USB_TYPE_MASK) == USB_TYPE_STANDARD) {
+		pr_debug("STANDARD ");
+		switch (cmd->bRequest) {
+		case USB_REQ_GET_STATUS:
+			pr_debug("GET_STATUS\n");
+			break;
+		case USB_REQ_CLEAR_FEATURE:
+			pr_debug("CLEAR_FEAT\n");
+			break;
+		case USB_REQ_SET_FEATURE:
+			pr_debug("SET_FEAT\n");
+			break;
+		case USB_REQ_SET_ADDRESS:
+			pr_debug("SET_ADDRRS\n");
+			break;
+		case USB_REQ_GET_DESCRIPTOR:
+			pr_debug("GET_DESCRI\n");
+			break;
+		case USB_REQ_SET_DESCRIPTOR:
+			pr_debug("SET_DESCRI\n");
+			break;
+		case USB_REQ_GET_CONFIGURATION:
+			pr_debug("GET_CONFIG\n");
+			break;
+		case USB_REQ_SET_CONFIGURATION:
+			pr_debug("SET_CONFIG\n");
+			break;
+		case USB_REQ_GET_INTERFACE:
+			pr_debug("GET_INTERF\n");
+			break;
+		case USB_REQ_SET_INTERFACE:
+			pr_debug("SET_INTERF\n");
+			break;
+		case USB_REQ_SYNCH_FRAME:
+			pr_debug("SYNC_FRAME\n");
+			break;
+		default:
+			pr_debug("REQ(%02X)\n", cmd->bRequest);
+			break;
+		}
+		usbip_dump_request_type(cmd->bRequestType);
+	} else if ((cmd->bRequestType & USB_TYPE_MASK) == USB_TYPE_CLASS) {
+		pr_debug("CLASS\n");
+	} else if ((cmd->bRequestType & USB_TYPE_MASK) == USB_TYPE_VENDOR) {
+		pr_debug("VENDOR\n");
+	} else if ((cmd->bRequestType & USB_TYPE_MASK) == USB_TYPE_RESERVED) {
+		pr_debug("RESERVED\n");
+	}
+}
+
+void usbip_dump_urb(struct urb *urb)
+{
+	struct device *dev;
+
+	if (!urb) {
+		pr_debug("urb: null pointer!!\n");
+		return;
+	}
+
+	if (!urb->dev) {
+		pr_debug("urb->dev: null pointer!!\n");
+		return;
+	}
+
+	dev = &urb->dev->dev;
+
+	dev_dbg(dev, "   urb                   :%p\n", urb);
+	dev_dbg(dev, "   dev                   :%p\n", urb->dev);
+
+	usbip_dump_usb_device(urb->dev);
+
+	dev_dbg(dev, "   pipe                  :%08x ", urb->pipe);
+
+	usbip_dump_pipe(urb->pipe);
+
+	dev_dbg(dev, "   status                :%d\n", urb->status);
+	dev_dbg(dev, "   transfer_flags        :%08X\n", urb->transfer_flags);
+	dev_dbg(dev, "   transfer_buffer       :%p\n", urb->transfer_buffer);
+	dev_dbg(dev, "   transfer_buffer_length:%d\n",
+						urb->transfer_buffer_length);
+	dev_dbg(dev, "   actual_length         :%d\n", urb->actual_length);
+	dev_dbg(dev, "   setup_packet          :%p\n", urb->setup_packet);
+
+	if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL)
+		usbip_dump_usb_ctrlrequest(
+			(struct usb_ctrlrequest *)urb->setup_packet);
+
+	dev_dbg(dev, "   start_frame           :%d\n", urb->start_frame);
+	dev_dbg(dev, "   number_of_packets     :%d\n", urb->number_of_packets);
+	dev_dbg(dev, "   interval              :%d\n", urb->interval);
+	dev_dbg(dev, "   error_count           :%d\n", urb->error_count);
+	dev_dbg(dev, "   context               :%p\n", urb->context);
+	dev_dbg(dev, "   complete              :%p\n", urb->complete);
+}
+EXPORT_SYMBOL_GPL(usbip_dump_urb);
+
+void usbip_dump_header(struct usbip_header *pdu)
+{
+	pr_debug("BASE: cmd %u seq %u devid %u dir %u ep %u\n",
+		 pdu->base.command,
+		 pdu->base.seqnum,
+		 pdu->base.devid,
+		 pdu->base.direction,
+		 pdu->base.ep);
+
+	switch (pdu->base.command) {
+	case USBIP_CMD_SUBMIT:
+		pr_debug("USBIP_CMD_SUBMIT: x_flags %u x_len %u sf %u #p %d iv %d\n",
+			 pdu->u.cmd_submit.transfer_flags,
+			 pdu->u.cmd_submit.transfer_buffer_length,
+			 pdu->u.cmd_submit.start_frame,
+			 pdu->u.cmd_submit.number_of_packets,
+			 pdu->u.cmd_submit.interval);
+		break;
+	case USBIP_CMD_UNLINK:
+		pr_debug("USBIP_CMD_UNLINK: seq %u\n",
+			 pdu->u.cmd_unlink.seqnum);
+		break;
+	case USBIP_RET_SUBMIT:
+		pr_debug("USBIP_RET_SUBMIT: st %d al %u sf %d #p %d ec %d\n",
+			 pdu->u.ret_submit.status,
+			 pdu->u.ret_submit.actual_length,
+			 pdu->u.ret_submit.start_frame,
+			 pdu->u.ret_submit.number_of_packets,
+			 pdu->u.ret_submit.error_count);
+		break;
+	case USBIP_RET_UNLINK:
+		pr_debug("USBIP_RET_UNLINK: status %d\n",
+			 pdu->u.ret_unlink.status);
+		break;
+	default:
+		/* NOT REACHED */
+		pr_err("unknown command\n");
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(usbip_dump_header);
+
+/* Receive data over TCP/IP. */
+int usbip_recv(struct socket *sock, void *buf, int size)
+{
+	int result;
+	struct msghdr msg;
+	struct kvec iov;
+	int total = 0;
+
+	/* for blocks of if (usbip_dbg_flag_xmit) */
+	char *bp = buf;
+	int osize = size;
+
+	usbip_dbg_xmit("enter\n");
+
+	if (!sock || !buf || !size) {
+		pr_err("invalid arg, sock %p buff %p size %d\n", sock, buf,
+		       size);
+		return -EINVAL;
+	}
+
+	do {
+		sock->sk->sk_allocation = GFP_NOIO;
+		iov.iov_base    = buf;
+		iov.iov_len     = size;
+		msg.msg_name    = NULL;
+		msg.msg_namelen = 0;
+		msg.msg_control = NULL;
+		msg.msg_controllen = 0;
+		msg.msg_flags      = MSG_NOSIGNAL;
+
+		result = kernel_recvmsg(sock, &msg, &iov, 1, size, MSG_WAITALL);
+		if (result <= 0) {
+			pr_debug("receive sock %p buf %p size %u ret %d total %d\n",
+				 sock, buf, size, result, total);
+			goto err;
+		}
+
+		size -= result;
+		buf += result;
+		total += result;
+	} while (size > 0);
+
+	if (usbip_dbg_flag_xmit) {
+		if (!in_interrupt())
+			pr_debug("%-10s:", current->comm);
+		else
+			pr_debug("interrupt  :");
+
+		pr_debug("receiving....\n");
+		usbip_dump_buffer(bp, osize);
+		pr_debug("received, osize %d ret %d size %d total %d\n",
+			 osize, result, size, total);
+	}
+
+	return total;
+
+err:
+	return result;
+}
+EXPORT_SYMBOL_GPL(usbip_recv);
+
+/* there may be more cases to tweak the flags. */
+static unsigned int tweak_transfer_flags(unsigned int flags)
+{
+	flags &= ~URB_NO_TRANSFER_DMA_MAP;
+	return flags;
+}
+
+static void usbip_pack_cmd_submit(struct usbip_header *pdu, struct urb *urb,
+				  int pack)
+{
+	struct usbip_header_cmd_submit *spdu = &pdu->u.cmd_submit;
+
+	/*
+	 * Some members are not still implemented in usbip. I hope this issue
+	 * will be discussed when usbip is ported to other operating systems.
+	 */
+	if (pack) {
+		spdu->transfer_flags =
+			tweak_transfer_flags(urb->transfer_flags);
+		spdu->transfer_buffer_length	= urb->transfer_buffer_length;
+		spdu->start_frame		= urb->start_frame;
+		spdu->number_of_packets		= urb->number_of_packets;
+		spdu->interval			= urb->interval;
+	} else  {
+		urb->transfer_flags         = spdu->transfer_flags;
+		urb->transfer_buffer_length = spdu->transfer_buffer_length;
+		urb->start_frame            = spdu->start_frame;
+		urb->number_of_packets      = spdu->number_of_packets;
+		urb->interval               = spdu->interval;
+	}
+}
+
+static void usbip_pack_ret_submit(struct usbip_header *pdu, struct urb *urb,
+				  int pack)
+{
+	struct usbip_header_ret_submit *rpdu = &pdu->u.ret_submit;
+
+	if (pack) {
+		rpdu->status		= urb->status;
+		rpdu->actual_length	= urb->actual_length;
+		rpdu->start_frame	= urb->start_frame;
+		rpdu->number_of_packets = urb->number_of_packets;
+		rpdu->error_count	= urb->error_count;
+	} else {
+		urb->status		= rpdu->status;
+		urb->actual_length	= rpdu->actual_length;
+		urb->start_frame	= rpdu->start_frame;
+		urb->number_of_packets = rpdu->number_of_packets;
+		urb->error_count	= rpdu->error_count;
+	}
+}
+
+void usbip_pack_pdu(struct usbip_header *pdu, struct urb *urb, int cmd,
+		    int pack)
+{
+	switch (cmd) {
+	case USBIP_CMD_SUBMIT:
+		usbip_pack_cmd_submit(pdu, urb, pack);
+		break;
+	case USBIP_RET_SUBMIT:
+		usbip_pack_ret_submit(pdu, urb, pack);
+		break;
+	default:
+		/* NOT REACHED */
+		pr_err("unknown command\n");
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(usbip_pack_pdu);
+
+static void correct_endian_basic(struct usbip_header_basic *base, int send)
+{
+	if (send) {
+		base->command	= cpu_to_be32(base->command);
+		base->seqnum	= cpu_to_be32(base->seqnum);
+		base->devid	= cpu_to_be32(base->devid);
+		base->direction	= cpu_to_be32(base->direction);
+		base->ep	= cpu_to_be32(base->ep);
+	} else {
+		base->command	= be32_to_cpu(base->command);
+		base->seqnum	= be32_to_cpu(base->seqnum);
+		base->devid	= be32_to_cpu(base->devid);
+		base->direction	= be32_to_cpu(base->direction);
+		base->ep	= be32_to_cpu(base->ep);
+	}
+}
+
+static void correct_endian_cmd_submit(struct usbip_header_cmd_submit *pdu,
+				      int send)
+{
+	if (send) {
+		pdu->transfer_flags = cpu_to_be32(pdu->transfer_flags);
+
+		cpu_to_be32s(&pdu->transfer_buffer_length);
+		cpu_to_be32s(&pdu->start_frame);
+		cpu_to_be32s(&pdu->number_of_packets);
+		cpu_to_be32s(&pdu->interval);
+	} else {
+		pdu->transfer_flags = be32_to_cpu(pdu->transfer_flags);
+
+		be32_to_cpus(&pdu->transfer_buffer_length);
+		be32_to_cpus(&pdu->start_frame);
+		be32_to_cpus(&pdu->number_of_packets);
+		be32_to_cpus(&pdu->interval);
+	}
+}
+
+static void correct_endian_ret_submit(struct usbip_header_ret_submit *pdu,
+				      int send)
+{
+	if (send) {
+		cpu_to_be32s(&pdu->status);
+		cpu_to_be32s(&pdu->actual_length);
+		cpu_to_be32s(&pdu->start_frame);
+		cpu_to_be32s(&pdu->number_of_packets);
+		cpu_to_be32s(&pdu->error_count);
+	} else {
+		be32_to_cpus(&pdu->status);
+		be32_to_cpus(&pdu->actual_length);
+		be32_to_cpus(&pdu->start_frame);
+		be32_to_cpus(&pdu->number_of_packets);
+		be32_to_cpus(&pdu->error_count);
+	}
+}
+
+static void correct_endian_cmd_unlink(struct usbip_header_cmd_unlink *pdu,
+				      int send)
+{
+	if (send)
+		pdu->seqnum = cpu_to_be32(pdu->seqnum);
+	else
+		pdu->seqnum = be32_to_cpu(pdu->seqnum);
+}
+
+static void correct_endian_ret_unlink(struct usbip_header_ret_unlink *pdu,
+				      int send)
+{
+	if (send)
+		cpu_to_be32s(&pdu->status);
+	else
+		be32_to_cpus(&pdu->status);
+}
+
+void usbip_header_correct_endian(struct usbip_header *pdu, int send)
+{
+	__u32 cmd = 0;
+
+	if (send)
+		cmd = pdu->base.command;
+
+	correct_endian_basic(&pdu->base, send);
+
+	if (!send)
+		cmd = pdu->base.command;
+
+	switch (cmd) {
+	case USBIP_CMD_SUBMIT:
+		correct_endian_cmd_submit(&pdu->u.cmd_submit, send);
+		break;
+	case USBIP_RET_SUBMIT:
+		correct_endian_ret_submit(&pdu->u.ret_submit, send);
+		break;
+	case USBIP_CMD_UNLINK:
+		correct_endian_cmd_unlink(&pdu->u.cmd_unlink, send);
+		break;
+	case USBIP_RET_UNLINK:
+		correct_endian_ret_unlink(&pdu->u.ret_unlink, send);
+		break;
+	default:
+		/* NOT REACHED */
+		pr_err("unknown command\n");
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(usbip_header_correct_endian);
+
+static void usbip_iso_packet_correct_endian(
+		struct usbip_iso_packet_descriptor *iso, int send)
+{
+	/* does not need all members. but copy all simply. */
+	if (send) {
+		iso->offset	= cpu_to_be32(iso->offset);
+		iso->length	= cpu_to_be32(iso->length);
+		iso->status	= cpu_to_be32(iso->status);
+		iso->actual_length = cpu_to_be32(iso->actual_length);
+	} else {
+		iso->offset	= be32_to_cpu(iso->offset);
+		iso->length	= be32_to_cpu(iso->length);
+		iso->status	= be32_to_cpu(iso->status);
+		iso->actual_length = be32_to_cpu(iso->actual_length);
+	}
+}
+
+static void usbip_pack_iso(struct usbip_iso_packet_descriptor *iso,
+			   struct usb_iso_packet_descriptor *uiso, int pack)
+{
+	if (pack) {
+		iso->offset		= uiso->offset;
+		iso->length		= uiso->length;
+		iso->status		= uiso->status;
+		iso->actual_length	= uiso->actual_length;
+	} else {
+		uiso->offset		= iso->offset;
+		uiso->length		= iso->length;
+		uiso->status		= iso->status;
+		uiso->actual_length	= iso->actual_length;
+	}
+}
+
+/* must free buffer */
+struct usbip_iso_packet_descriptor*
+usbip_alloc_iso_desc_pdu(struct urb *urb, ssize_t *bufflen)
+{
+	struct usbip_iso_packet_descriptor *iso;
+	int np = urb->number_of_packets;
+	ssize_t size = np * sizeof(*iso);
+	int i;
+
+	iso = kzalloc(size, GFP_KERNEL);
+	if (!iso)
+		return NULL;
+
+	for (i = 0; i < np; i++) {
+		usbip_pack_iso(&iso[i], &urb->iso_frame_desc[i], 1);
+		usbip_iso_packet_correct_endian(&iso[i], 1);
+	}
+
+	*bufflen = size;
+
+	return iso;
+}
+EXPORT_SYMBOL_GPL(usbip_alloc_iso_desc_pdu);
+
+/* some members of urb must be substituted before. */
+int usbip_recv_iso(struct usbip_device *ud, struct urb *urb)
+{
+	void *buff;
+	struct usbip_iso_packet_descriptor *iso;
+	int np = urb->number_of_packets;
+	int size = np * sizeof(*iso);
+	int i;
+	int ret;
+	int total_length = 0;
+
+	if (!usb_pipeisoc(urb->pipe))
+		return 0;
+
+	/* my Bluetooth dongle gets ISO URBs which are np = 0 */
+	if (np == 0)
+		return 0;
+
+	buff = kzalloc(size, GFP_KERNEL);
+	if (!buff)
+		return -ENOMEM;
+
+	ret = usbip_recv(ud->tcp_socket, buff, size);
+	if (ret != size) {
+		dev_err(&urb->dev->dev, "recv iso_frame_descriptor, %d\n",
+			ret);
+		kfree(buff);
+
+		if (ud->side == USBIP_STUB)
+			usbip_event_add(ud, SDEV_EVENT_ERROR_TCP);
+		else
+			usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
+
+		return -EPIPE;
+	}
+
+	iso = (struct usbip_iso_packet_descriptor *) buff;
+	for (i = 0; i < np; i++) {
+		usbip_iso_packet_correct_endian(&iso[i], 0);
+		usbip_pack_iso(&iso[i], &urb->iso_frame_desc[i], 0);
+		total_length += urb->iso_frame_desc[i].actual_length;
+	}
+
+	kfree(buff);
+
+	if (total_length != urb->actual_length) {
+		dev_err(&urb->dev->dev,
+			"total length of iso packets %d not equal to actual length of buffer %d\n",
+			total_length, urb->actual_length);
+
+		if (ud->side == USBIP_STUB)
+			usbip_event_add(ud, SDEV_EVENT_ERROR_TCP);
+		else
+			usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
+
+		return -EPIPE;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usbip_recv_iso);
+
+/*
+ * This functions restores the padding which was removed for optimizing
+ * the bandwidth during transfer over tcp/ip
+ *
+ * buffer and iso packets need to be stored and be in propeper endian in urb
+ * before calling this function
+ */
+void usbip_pad_iso(struct usbip_device *ud, struct urb *urb)
+{
+	int np = urb->number_of_packets;
+	int i;
+	int actualoffset = urb->actual_length;
+
+	if (!usb_pipeisoc(urb->pipe))
+		return;
+
+	/* if no packets or length of data is 0, then nothing to unpack */
+	if (np == 0 || urb->actual_length == 0)
+		return;
+
+	/*
+	 * if actual_length is transfer_buffer_length then no padding is
+	 * present.
+	 */
+	if (urb->actual_length == urb->transfer_buffer_length)
+		return;
+
+	/*
+	 * loop over all packets from last to first (to prevent overwritting
+	 * memory when padding) and move them into the proper place
+	 */
+	for (i = np-1; i > 0; i--) {
+		actualoffset -= urb->iso_frame_desc[i].actual_length;
+		memmove(urb->transfer_buffer + urb->iso_frame_desc[i].offset,
+			urb->transfer_buffer + actualoffset,
+			urb->iso_frame_desc[i].actual_length);
+	}
+}
+EXPORT_SYMBOL_GPL(usbip_pad_iso);
+
+/* some members of urb must be substituted before. */
+int usbip_recv_xbuff(struct usbip_device *ud, struct urb *urb)
+{
+	int ret;
+	int size;
+
+	if (ud->side == USBIP_STUB) {
+		/* the direction of urb must be OUT. */
+		if (usb_pipein(urb->pipe))
+			return 0;
+
+		size = urb->transfer_buffer_length;
+	} else {
+		/* the direction of urb must be IN. */
+		if (usb_pipeout(urb->pipe))
+			return 0;
+
+		size = urb->actual_length;
+	}
+
+	/* no need to recv xbuff */
+	if (!(size > 0))
+		return 0;
+
+	ret = usbip_recv(ud->tcp_socket, urb->transfer_buffer, size);
+	if (ret != size) {
+		dev_err(&urb->dev->dev, "recv xbuf, %d\n", ret);
+		if (ud->side == USBIP_STUB) {
+			usbip_event_add(ud, SDEV_EVENT_ERROR_TCP);
+		} else {
+			usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
+			return -EPIPE;
+		}
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usbip_recv_xbuff);
+
+static int __init usbip_core_init(void)
+{
+	pr_info(DRIVER_DESC " v" USBIP_VERSION "\n");
+	return 0;
+}
+
+static void __exit usbip_core_exit(void)
+{
+	return;
+}
+
+module_init(usbip_core_init);
+module_exit(usbip_core_exit);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+MODULE_VERSION(USBIP_VERSION);
diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h
new file mode 100644
index 000000000000..86b08475c254
--- /dev/null
+++ b/drivers/usb/usbip/usbip_common.h
@@ -0,0 +1,335 @@
+/*
+ * Copyright (C) 2003-2008 Takahiro Hirofuchi
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+#ifndef __USBIP_COMMON_H
+#define __USBIP_COMMON_H
+
+#include <linux/compiler.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/net.h>
+#include <linux/printk.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/usb.h>
+#include <linux/wait.h>
+#include <uapi/linux/usbip.h>
+
+#define USBIP_VERSION "1.0.0"
+
+#undef pr_fmt
+
+#ifdef DEBUG
+#define pr_fmt(fmt)     KBUILD_MODNAME ": %s:%d: " fmt, __func__, __LINE__
+#else
+#define pr_fmt(fmt)     KBUILD_MODNAME ": " fmt
+#endif
+
+enum {
+	usbip_debug_xmit	= (1 << 0),
+	usbip_debug_sysfs	= (1 << 1),
+	usbip_debug_urb		= (1 << 2),
+	usbip_debug_eh		= (1 << 3),
+
+	usbip_debug_stub_cmp	= (1 << 8),
+	usbip_debug_stub_dev	= (1 << 9),
+	usbip_debug_stub_rx	= (1 << 10),
+	usbip_debug_stub_tx	= (1 << 11),
+
+	usbip_debug_vhci_rh	= (1 << 8),
+	usbip_debug_vhci_hc	= (1 << 9),
+	usbip_debug_vhci_rx	= (1 << 10),
+	usbip_debug_vhci_tx	= (1 << 11),
+	usbip_debug_vhci_sysfs  = (1 << 12)
+};
+
+#define usbip_dbg_flag_xmit	(usbip_debug_flag & usbip_debug_xmit)
+#define usbip_dbg_flag_vhci_rh	(usbip_debug_flag & usbip_debug_vhci_rh)
+#define usbip_dbg_flag_vhci_hc	(usbip_debug_flag & usbip_debug_vhci_hc)
+#define usbip_dbg_flag_vhci_rx	(usbip_debug_flag & usbip_debug_vhci_rx)
+#define usbip_dbg_flag_vhci_tx	(usbip_debug_flag & usbip_debug_vhci_tx)
+#define usbip_dbg_flag_stub_rx	(usbip_debug_flag & usbip_debug_stub_rx)
+#define usbip_dbg_flag_stub_tx	(usbip_debug_flag & usbip_debug_stub_tx)
+#define usbip_dbg_flag_vhci_sysfs  (usbip_debug_flag & usbip_debug_vhci_sysfs)
+
+extern unsigned long usbip_debug_flag;
+extern struct device_attribute dev_attr_usbip_debug;
+
+#define usbip_dbg_with_flag(flag, fmt, args...)		\
+	do {						\
+		if (flag & usbip_debug_flag)		\
+			pr_debug(fmt, ##args);		\
+	} while (0)
+
+#define usbip_dbg_sysfs(fmt, args...) \
+	usbip_dbg_with_flag(usbip_debug_sysfs, fmt , ##args)
+#define usbip_dbg_xmit(fmt, args...) \
+	usbip_dbg_with_flag(usbip_debug_xmit, fmt , ##args)
+#define usbip_dbg_urb(fmt, args...) \
+	usbip_dbg_with_flag(usbip_debug_urb, fmt , ##args)
+#define usbip_dbg_eh(fmt, args...) \
+	usbip_dbg_with_flag(usbip_debug_eh, fmt , ##args)
+
+#define usbip_dbg_vhci_rh(fmt, args...)	\
+	usbip_dbg_with_flag(usbip_debug_vhci_rh, fmt , ##args)
+#define usbip_dbg_vhci_hc(fmt, args...)	\
+	usbip_dbg_with_flag(usbip_debug_vhci_hc, fmt , ##args)
+#define usbip_dbg_vhci_rx(fmt, args...)	\
+	usbip_dbg_with_flag(usbip_debug_vhci_rx, fmt , ##args)
+#define usbip_dbg_vhci_tx(fmt, args...)	\
+	usbip_dbg_with_flag(usbip_debug_vhci_tx, fmt , ##args)
+#define usbip_dbg_vhci_sysfs(fmt, args...) \
+	usbip_dbg_with_flag(usbip_debug_vhci_sysfs, fmt , ##args)
+
+#define usbip_dbg_stub_cmp(fmt, args...) \
+	usbip_dbg_with_flag(usbip_debug_stub_cmp, fmt , ##args)
+#define usbip_dbg_stub_rx(fmt, args...) \
+	usbip_dbg_with_flag(usbip_debug_stub_rx, fmt , ##args)
+#define usbip_dbg_stub_tx(fmt, args...) \
+	usbip_dbg_with_flag(usbip_debug_stub_tx, fmt , ##args)
+
+/*
+ * USB/IP request headers
+ *
+ * Each request is transferred across the network to its counterpart, which
+ * facilitates the normal USB communication. The values contained in the headers
+ * are basically the same as in a URB. Currently, four request types are
+ * defined:
+ *
+ *  - USBIP_CMD_SUBMIT: a USB request block, corresponds to usb_submit_urb()
+ *    (client to server)
+ *
+ *  - USBIP_RET_SUBMIT: the result of USBIP_CMD_SUBMIT
+ *    (server to client)
+ *
+ *  - USBIP_CMD_UNLINK: an unlink request of a pending USBIP_CMD_SUBMIT,
+ *    corresponds to usb_unlink_urb()
+ *    (client to server)
+ *
+ *  - USBIP_RET_UNLINK: the result of USBIP_CMD_UNLINK
+ *    (server to client)
+ *
+ */
+#define USBIP_CMD_SUBMIT	0x0001
+#define USBIP_CMD_UNLINK	0x0002
+#define USBIP_RET_SUBMIT	0x0003
+#define USBIP_RET_UNLINK	0x0004
+
+#define USBIP_DIR_OUT	0x00
+#define USBIP_DIR_IN	0x01
+
+/**
+ * struct usbip_header_basic - data pertinent to every request
+ * @command: the usbip request type
+ * @seqnum: sequential number that identifies requests; incremented per
+ *	    connection
+ * @devid: specifies a remote USB device uniquely instead of busnum and devnum;
+ *	   in the stub driver, this value is ((busnum << 16) | devnum)
+ * @direction: direction of the transfer
+ * @ep: endpoint number
+ */
+struct usbip_header_basic {
+	__u32 command;
+	__u32 seqnum;
+	__u32 devid;
+	__u32 direction;
+	__u32 ep;
+} __packed;
+
+/**
+ * struct usbip_header_cmd_submit - USBIP_CMD_SUBMIT packet header
+ * @transfer_flags: URB flags
+ * @transfer_buffer_length: the data size for (in) or (out) transfer
+ * @start_frame: initial frame for isochronous or interrupt transfers
+ * @number_of_packets: number of isochronous packets
+ * @interval: maximum time for the request on the server-side host controller
+ * @setup: setup data for a control request
+ */
+struct usbip_header_cmd_submit {
+	__u32 transfer_flags;
+	__s32 transfer_buffer_length;
+
+	/* it is difficult for usbip to sync frames (reserved only?) */
+	__s32 start_frame;
+	__s32 number_of_packets;
+	__s32 interval;
+
+	unsigned char setup[8];
+} __packed;
+
+/**
+ * struct usbip_header_ret_submit - USBIP_RET_SUBMIT packet header
+ * @status: return status of a non-iso request
+ * @actual_length: number of bytes transferred
+ * @start_frame: initial frame for isochronous or interrupt transfers
+ * @number_of_packets: number of isochronous packets
+ * @error_count: number of errors for isochronous transfers
+ */
+struct usbip_header_ret_submit {
+	__s32 status;
+	__s32 actual_length;
+	__s32 start_frame;
+	__s32 number_of_packets;
+	__s32 error_count;
+} __packed;
+
+/**
+ * struct usbip_header_cmd_unlink - USBIP_CMD_UNLINK packet header
+ * @seqnum: the URB seqnum to unlink
+ */
+struct usbip_header_cmd_unlink {
+	__u32 seqnum;
+} __packed;
+
+/**
+ * struct usbip_header_ret_unlink - USBIP_RET_UNLINK packet header
+ * @status: return status of the request
+ */
+struct usbip_header_ret_unlink {
+	__s32 status;
+} __packed;
+
+/**
+ * struct usbip_header - common header for all usbip packets
+ * @base: the basic header
+ * @u: packet type dependent header
+ */
+struct usbip_header {
+	struct usbip_header_basic base;
+
+	union {
+		struct usbip_header_cmd_submit	cmd_submit;
+		struct usbip_header_ret_submit	ret_submit;
+		struct usbip_header_cmd_unlink	cmd_unlink;
+		struct usbip_header_ret_unlink	ret_unlink;
+	} u;
+} __packed;
+
+/*
+ * This is the same as usb_iso_packet_descriptor but packed for pdu.
+ */
+struct usbip_iso_packet_descriptor {
+	__u32 offset;
+	__u32 length;			/* expected length */
+	__u32 actual_length;
+	__u32 status;
+} __packed;
+
+enum usbip_side {
+	USBIP_VHCI,
+	USBIP_STUB,
+};
+
+/* event handler */
+#define USBIP_EH_SHUTDOWN	(1 << 0)
+#define USBIP_EH_BYE		(1 << 1)
+#define USBIP_EH_RESET		(1 << 2)
+#define USBIP_EH_UNUSABLE	(1 << 3)
+
+#define SDEV_EVENT_REMOVED   (USBIP_EH_SHUTDOWN | USBIP_EH_RESET | USBIP_EH_BYE)
+#define	SDEV_EVENT_DOWN		(USBIP_EH_SHUTDOWN | USBIP_EH_RESET)
+#define	SDEV_EVENT_ERROR_TCP	(USBIP_EH_SHUTDOWN | USBIP_EH_RESET)
+#define	SDEV_EVENT_ERROR_SUBMIT	(USBIP_EH_SHUTDOWN | USBIP_EH_RESET)
+#define	SDEV_EVENT_ERROR_MALLOC	(USBIP_EH_SHUTDOWN | USBIP_EH_UNUSABLE)
+
+#define	VDEV_EVENT_REMOVED	(USBIP_EH_SHUTDOWN | USBIP_EH_BYE)
+#define	VDEV_EVENT_DOWN		(USBIP_EH_SHUTDOWN | USBIP_EH_RESET)
+#define	VDEV_EVENT_ERROR_TCP	(USBIP_EH_SHUTDOWN | USBIP_EH_RESET)
+#define	VDEV_EVENT_ERROR_MALLOC	(USBIP_EH_SHUTDOWN | USBIP_EH_UNUSABLE)
+
+/* a common structure for stub_device and vhci_device */
+struct usbip_device {
+	enum usbip_side side;
+	enum usbip_device_status status;
+
+	/* lock for status */
+	spinlock_t lock;
+
+	struct socket *tcp_socket;
+
+	struct task_struct *tcp_rx;
+	struct task_struct *tcp_tx;
+
+	unsigned long event;
+	struct task_struct *eh;
+	wait_queue_head_t eh_waitq;
+
+	struct eh_ops {
+		void (*shutdown)(struct usbip_device *);
+		void (*reset)(struct usbip_device *);
+		void (*unusable)(struct usbip_device *);
+	} eh_ops;
+};
+
+#define kthread_get_run(threadfn, data, namefmt, ...)			   \
+({									   \
+	struct task_struct *__k						   \
+		= kthread_create(threadfn, data, namefmt, ## __VA_ARGS__); \
+	if (!IS_ERR(__k)) {						   \
+		get_task_struct(__k);					   \
+		wake_up_process(__k);					   \
+	}								   \
+	__k;								   \
+})
+
+#define kthread_stop_put(k)		\
+	do {				\
+		kthread_stop(k);	\
+		put_task_struct(k);	\
+	} while (0)
+
+/* usbip_common.c */
+void usbip_dump_urb(struct urb *purb);
+void usbip_dump_header(struct usbip_header *pdu);
+
+int usbip_recv(struct socket *sock, void *buf, int size);
+
+void usbip_pack_pdu(struct usbip_header *pdu, struct urb *urb, int cmd,
+		    int pack);
+void usbip_header_correct_endian(struct usbip_header *pdu, int send);
+
+struct usbip_iso_packet_descriptor*
+usbip_alloc_iso_desc_pdu(struct urb *urb, ssize_t *bufflen);
+
+/* some members of urb must be substituted before. */
+int usbip_recv_iso(struct usbip_device *ud, struct urb *urb);
+void usbip_pad_iso(struct usbip_device *ud, struct urb *urb);
+int usbip_recv_xbuff(struct usbip_device *ud, struct urb *urb);
+
+/* usbip_event.c */
+int usbip_start_eh(struct usbip_device *ud);
+void usbip_stop_eh(struct usbip_device *ud);
+void usbip_event_add(struct usbip_device *ud, unsigned long event);
+int usbip_event_happened(struct usbip_device *ud);
+
+static inline int interface_to_busnum(struct usb_interface *interface)
+{
+	struct usb_device *udev = interface_to_usbdev(interface);
+
+	return udev->bus->busnum;
+}
+
+static inline int interface_to_devnum(struct usb_interface *interface)
+{
+	struct usb_device *udev = interface_to_usbdev(interface);
+
+	return udev->devnum;
+}
+
+#endif /* __USBIP_COMMON_H */
diff --git a/drivers/usb/usbip/usbip_event.c b/drivers/usb/usbip/usbip_event.c
new file mode 100644
index 000000000000..64933b993d7a
--- /dev/null
+++ b/drivers/usb/usbip/usbip_event.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2003-2008 Takahiro Hirofuchi
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+#include <linux/kthread.h>
+#include <linux/export.h>
+
+#include "usbip_common.h"
+
+static int event_handler(struct usbip_device *ud)
+{
+	usbip_dbg_eh("enter\n");
+
+	/*
+	 * Events are handled by only this thread.
+	 */
+	while (usbip_event_happened(ud)) {
+		usbip_dbg_eh("pending event %lx\n", ud->event);
+
+		/*
+		 * NOTE: shutdown must come first.
+		 * Shutdown the device.
+		 */
+		if (ud->event & USBIP_EH_SHUTDOWN) {
+			ud->eh_ops.shutdown(ud);
+			ud->event &= ~USBIP_EH_SHUTDOWN;
+		}
+
+		/* Reset the device. */
+		if (ud->event & USBIP_EH_RESET) {
+			ud->eh_ops.reset(ud);
+			ud->event &= ~USBIP_EH_RESET;
+		}
+
+		/* Mark the device as unusable. */
+		if (ud->event & USBIP_EH_UNUSABLE) {
+			ud->eh_ops.unusable(ud);
+			ud->event &= ~USBIP_EH_UNUSABLE;
+		}
+
+		/* Stop the error handler. */
+		if (ud->event & USBIP_EH_BYE)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int event_handler_loop(void *data)
+{
+	struct usbip_device *ud = data;
+
+	while (!kthread_should_stop()) {
+		wait_event_interruptible(ud->eh_waitq,
+					 usbip_event_happened(ud) ||
+					 kthread_should_stop());
+		usbip_dbg_eh("wakeup\n");
+
+		if (event_handler(ud) < 0)
+			break;
+	}
+
+	return 0;
+}
+
+int usbip_start_eh(struct usbip_device *ud)
+{
+	init_waitqueue_head(&ud->eh_waitq);
+	ud->event = 0;
+
+	ud->eh = kthread_run(event_handler_loop, ud, "usbip_eh");
+	if (IS_ERR(ud->eh)) {
+		pr_warn("Unable to start control thread\n");
+		return PTR_ERR(ud->eh);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(usbip_start_eh);
+
+void usbip_stop_eh(struct usbip_device *ud)
+{
+	if (ud->eh == current)
+		return; /* do not wait for myself */
+
+	kthread_stop(ud->eh);
+	usbip_dbg_eh("usbip_eh has finished\n");
+}
+EXPORT_SYMBOL_GPL(usbip_stop_eh);
+
+void usbip_event_add(struct usbip_device *ud, unsigned long event)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ud->lock, flags);
+	ud->event |= event;
+	wake_up(&ud->eh_waitq);
+	spin_unlock_irqrestore(&ud->lock, flags);
+}
+EXPORT_SYMBOL_GPL(usbip_event_add);
+
+int usbip_event_happened(struct usbip_device *ud)
+{
+	int happened = 0;
+
+	spin_lock(&ud->lock);
+	if (ud->event != 0)
+		happened = 1;
+	spin_unlock(&ud->lock);
+
+	return happened;
+}
+EXPORT_SYMBOL_GPL(usbip_event_happened);
diff --git a/drivers/usb/usbip/usbip_protocol.txt b/drivers/usb/usbip/usbip_protocol.txt
new file mode 100644
index 000000000000..16b6fe27284c
--- /dev/null
+++ b/drivers/usb/usbip/usbip_protocol.txt
@@ -0,0 +1,358 @@
+PRELIMINARY DRAFT, MAY CONTAIN MISTAKES!
+28 Jun 2011
+
+The USB/IP protocol follows a server/client architecture. The server exports the
+USB devices and the clients imports them. The device driver for the exported
+USB device runs on the client machine.
+
+The client may ask for the list of the exported USB devices. To get the list the
+client opens a TCP/IP connection towards the server, and sends an OP_REQ_DEVLIST
+packet on top of the TCP/IP connection (so the actual OP_REQ_DEVLIST may be sent
+in one or more pieces at the low level transport layer). The server sends back
+the OP_REP_DEVLIST packet which lists the exported USB devices. Finally the
+TCP/IP connection is closed.
+
+ virtual host controller                                 usb host
+      "client"                                           "server"
+  (imports USB devices)                             (exports USB devices)
+          |                                                 |
+          |                  OP_REQ_DEVLIST                 |
+          | ----------------------------------------------> |
+          |                                                 |
+          |                  OP_REP_DEVLIST                 |
+          | <---------------------------------------------- |
+          |                                                 |
+
+Once the client knows the list of exported USB devices it may decide to use one
+of them. First the client opens a TCP/IP connection towards the server and
+sends an OP_REQ_IMPORT packet. The server replies with OP_REP_IMPORT. If the
+import was successful the TCP/IP connection remains open and will be used
+to transfer the URB traffic between the client and the server. The client may
+send two types of packets: the USBIP_CMD_SUBMIT to submit an URB, and
+USBIP_CMD_UNLINK to unlink a previously submitted URB. The answers of the
+server may be USBIP_RET_SUBMIT and USBIP_RET_UNLINK respectively.
+
+ virtual host controller                                 usb host
+      "client"                                           "server"
+  (imports USB devices)                             (exports USB devices)
+          |                                                 |
+          |                  OP_REQ_IMPORT                  |
+          | ----------------------------------------------> |
+          |                                                 |
+          |                  OP_REP_IMPORT                  |
+          | <---------------------------------------------- |
+          |                                                 |
+          |                                                 |
+          |            USBIP_CMD_SUBMIT(seqnum = n)         |
+          | ----------------------------------------------> |
+          |                                                 |
+          |            USBIP_RET_SUBMIT(seqnum = n)         |
+          | <---------------------------------------------- |
+          |                        .                        |
+          |                        :                        |
+          |                                                 |
+          |            USBIP_CMD_SUBMIT(seqnum = m)         |
+          | ----------------------------------------------> |
+          |                                                 |
+          |            USBIP_CMD_SUBMIT(seqnum = m+1)       |
+          | ----------------------------------------------> |
+          |                                                 |
+          |            USBIP_CMD_SUBMIT(seqnum = m+2)       |
+          | ----------------------------------------------> |
+          |                                                 |
+          |            USBIP_RET_SUBMIT(seqnum = m)         |
+          | <---------------------------------------------- |
+          |                                                 |
+          |            USBIP_CMD_SUBMIT(seqnum = m+3)       |
+          | ----------------------------------------------> |
+          |                                                 |
+          |            USBIP_RET_SUBMIT(seqnum = m+1)       |
+          | <---------------------------------------------- |
+          |                                                 |
+          |            USBIP_CMD_SUBMIT(seqnum = m+4)       |
+          | ----------------------------------------------> |
+          |                                                 |
+          |            USBIP_RET_SUBMIT(seqnum = m+2)       |
+          | <---------------------------------------------- |
+          |                        .                        |
+          |                        :                        |
+          |                                                 |
+          |               USBIP_CMD_UNLINK                  |
+          | ----------------------------------------------> |
+          |                                                 |
+          |               USBIP_RET_UNLINK                  |
+          | <---------------------------------------------- |
+          |                                                 |
+
+The fields are in network (big endian) byte order meaning that the most significant
+byte (MSB) is stored at the lowest address.
+
+
+OP_REQ_DEVLIST: Retrieve the list of exported USB devices.
+
+ Offset    | Length | Value      | Description
+-----------+--------+------------+---------------------------------------------------
+ 0         | 2      | 0x0100     | Binary-coded decimal USBIP version number: v1.0.0
+-----------+--------+------------+---------------------------------------------------
+ 2         | 2      | 0x8005     | Command code: Retrieve the list of exported USB
+           |        |            |   devices.
+-----------+--------+------------+---------------------------------------------------
+ 4         | 4      | 0x00000000 | Status: unused, shall be set to 0
+
+OP_REP_DEVLIST: Reply with the list of exported USB devices.
+
+ Offset    | Length | Value      | Description
+-----------+--------+------------+---------------------------------------------------
+ 0         | 2      | 0x0100     | Binary-coded decimal USBIP version number: v1.0.0.
+-----------+--------+------------+---------------------------------------------------
+ 2         | 2      | 0x0005     | Reply code: The list of exported USB devices.
+-----------+--------+------------+---------------------------------------------------
+ 4         | 4      | 0x00000000 | Status: 0 for OK
+-----------+--------+------------+---------------------------------------------------
+ 8         | 4      | n          | Number of exported devices: 0 means no exported
+           |        |            |   devices.
+-----------+--------+------------+---------------------------------------------------
+ 0x0C      |        |            | From now on the exported n devices are described,
+           |        |            |   if any. If no devices are exported the message
+           |        |            |   ends with the previous "number of exported
+           |        |            |   devices" field.
+-----------+--------+------------+---------------------------------------------------
+           | 256    |            | path: Path of the device on the host exporting the
+           |        |            |   USB device, string closed with zero byte, e.g.
+           |        |            |   "/sys/devices/pci0000:00/0000:00:1d.1/usb3/3-2"
+           |        |            |   The unused bytes shall be filled with zero
+           |        |            |   bytes.
+-----------+--------+------------+---------------------------------------------------
+ 0x10C     | 32     |            | busid: Bus ID of the exported device, string
+           |        |            |   closed with zero byte, e.g. "3-2". The unused
+           |        |            |   bytes shall be filled with zero bytes.
+-----------+--------+------------+---------------------------------------------------
+ 0x12C     | 4      |            | busnum
+-----------+--------+------------+---------------------------------------------------
+ 0x130     | 4      |            | devnum
+-----------+--------+------------+---------------------------------------------------
+ 0x134     | 4      |            | speed
+-----------+--------+------------+---------------------------------------------------
+ 0x138     | 2      |            | idVendor
+-----------+--------+------------+---------------------------------------------------
+ 0x13A     | 2      |            | idProduct
+-----------+--------+------------+---------------------------------------------------
+ 0x13C     | 2      |            | bcdDevice
+-----------+--------+------------+---------------------------------------------------
+ 0x13E     | 1      |            | bDeviceClass
+-----------+--------+------------+---------------------------------------------------
+ 0x13F     | 1      |            | bDeviceSubClass
+-----------+--------+------------+---------------------------------------------------
+ 0x140     | 1      |            | bDeviceProtocol
+-----------+--------+------------+---------------------------------------------------
+ 0x141     | 1      |            | bConfigurationValue
+-----------+--------+------------+---------------------------------------------------
+ 0x142     | 1      |            | bNumConfigurations
+-----------+--------+------------+---------------------------------------------------
+ 0x143     | 1      |            | bNumInterfaces
+-----------+--------+------------+---------------------------------------------------
+ 0x144     |        | m_0        | From now on each interface is described, all
+           |        |            |   together bNumInterfaces times, with the
+           |        |            |   the following 4 fields:
+-----------+--------+------------+---------------------------------------------------
+           | 1      |            | bInterfaceClass
+-----------+--------+------------+---------------------------------------------------
+ 0x145     | 1      |            | bInterfaceSubClass
+-----------+--------+------------+---------------------------------------------------
+ 0x146     | 1      |            | bInterfaceProtocol
+-----------+--------+------------+---------------------------------------------------
+ 0x147     | 1      |            | padding byte for alignment, shall be set to zero
+-----------+--------+------------+---------------------------------------------------
+ 0xC +     |        |            | The second exported USB device starts at i=1
+ i*0x138 + |        |            | with the busid field.
+ m_(i-1)*4 |        |            |
+
+OP_REQ_IMPORT: Request to import (attach) a remote USB device.
+
+ Offset    | Length | Value      | Description
+-----------+--------+------------+---------------------------------------------------
+ 0         | 2      | 0x0100     | Binary-coded decimal USBIP version number: v1.0.0
+-----------+--------+------------+---------------------------------------------------
+ 2         | 2      | 0x8003     | Command code: import a remote USB device.
+-----------+--------+------------+---------------------------------------------------
+ 4         | 4      | 0x00000000 | Status: unused, shall be set to 0
+-----------+--------+------------+---------------------------------------------------
+ 8         | 32     |            | busid: the busid of the exported device on the
+           |        |            |   remote host. The possible values are taken
+           |        |            |   from the message field OP_REP_DEVLIST.busid.
+           |        |            |   A string closed with zero, the unused bytes
+           |        |            |   shall be filled with zeros.
+-----------+--------+------------+---------------------------------------------------
+
+OP_REP_IMPORT: Reply to import (attach) a remote USB device.
+
+ Offset    | Length | Value      | Description
+-----------+--------+------------+---------------------------------------------------
+ 0         | 2      | 0x0100     | Binary-coded decimal USBIP version number: v1.0.0
+-----------+--------+------------+---------------------------------------------------
+ 2         | 2      | 0x0003     | Reply code: Reply to import.
+-----------+--------+------------+---------------------------------------------------
+ 4         | 4      | 0x00000000 | Status: 0 for OK
+           |        |            |         1 for error
+-----------+--------+------------+---------------------------------------------------
+ 8         |        |            | From now on comes the details of the imported
+           |        |            |   device, if the previous status field was OK (0),
+           |        |            |   otherwise the reply ends with the status field.
+-----------+--------+------------+---------------------------------------------------
+           | 256    |            | path: Path of the device on the host exporting the
+           |        |            |   USB device, string closed with zero byte, e.g.
+           |        |            |   "/sys/devices/pci0000:00/0000:00:1d.1/usb3/3-2"
+           |        |            |   The unused bytes shall be filled with zero
+           |        |            |   bytes.
+-----------+--------+------------+---------------------------------------------------
+ 0x108     | 32     |            | busid: Bus ID of the exported device, string
+           |        |            |   closed with zero byte, e.g. "3-2". The unused
+           |        |            |   bytes shall be filled with zero bytes.
+-----------+--------+------------+---------------------------------------------------
+ 0x128     | 4      |            | busnum
+-----------+--------+------------+---------------------------------------------------
+ 0x12C     | 4      |            | devnum
+-----------+--------+------------+---------------------------------------------------
+ 0x130     | 4      |            | speed
+-----------+--------+------------+---------------------------------------------------
+ 0x134     | 2      |            | idVendor
+-----------+--------+------------+---------------------------------------------------
+ 0x136     | 2      |            | idProduct
+-----------+--------+------------+---------------------------------------------------
+ 0x138     | 2      |            | bcdDevice
+-----------+--------+------------+---------------------------------------------------
+ 0x139     | 1      |            | bDeviceClass
+-----------+--------+------------+---------------------------------------------------
+ 0x13A     | 1      |            | bDeviceSubClass
+-----------+--------+------------+---------------------------------------------------
+ 0x13B     | 1      |            | bDeviceProtocol
+-----------+--------+------------+---------------------------------------------------
+ 0x13C     | 1      |            | bConfigurationValue
+-----------+--------+------------+---------------------------------------------------
+ 0x13D     | 1      |            | bNumConfigurations
+-----------+--------+------------+---------------------------------------------------
+ 0x13E     | 1      |            | bNumInterfaces
+
+USBIP_CMD_SUBMIT: Submit an URB
+
+ Offset    | Length | Value      | Description
+-----------+--------+------------+---------------------------------------------------
+ 0         | 4      | 0x00000001 | command: Submit an URB
+-----------+--------+------------+---------------------------------------------------
+ 4         | 4      |            | seqnum: the sequence number of the URB to submit
+-----------+--------+------------+---------------------------------------------------
+ 8         | 4      |            | devid
+-----------+--------+------------+---------------------------------------------------
+ 0xC       | 4      |            | direction: 0: USBIP_DIR_OUT
+           |        |            |            1: USBIP_DIR_IN
+-----------+--------+------------+---------------------------------------------------
+ 0x10      | 4      |            | ep: endpoint number, possible values are: 0...15
+-----------+--------+------------+---------------------------------------------------
+ 0x14      | 4      |            | transfer_flags: possible values depend on the
+           |        |            |   URB transfer type, see below
+-----------+--------+------------+---------------------------------------------------
+ 0x18      | 4      |            | transfer_buffer_length
+-----------+--------+------------+---------------------------------------------------
+ 0x1C      | 4      |            | start_frame: specify the selected frame to
+           |        |            |   transmit an ISO frame, ignored if URB_ISO_ASAP
+           |        |            |   is specified at transfer_flags
+-----------+--------+------------+---------------------------------------------------
+ 0x20      | 4      |            | number_of_packets: number of ISO packets
+-----------+--------+------------+---------------------------------------------------
+ 0x24      | 4      |            | interval: maximum time for the request on the
+           |        |            |   server-side host controller
+-----------+--------+------------+---------------------------------------------------
+ 0x28      | 8      |            | setup: data bytes for USB setup, filled with
+           |        |            |   zeros if not used
+-----------+--------+------------+---------------------------------------------------
+ 0x30      |        |            | URB data. For ISO transfers the padding between
+           |        |            |   each ISO packets is not transmitted.
+
+
+  Allowed transfer_flags  | value      | control | interrupt | bulk     | isochronous
+ -------------------------+------------+---------+-----------+----------+-------------
+  URB_SHORT_NOT_OK        | 0x00000001 | only in | only in   | only in  | no
+  URB_ISO_ASAP            | 0x00000002 | no      | no        | no       | yes
+  URB_NO_TRANSFER_DMA_MAP | 0x00000004 | yes     | yes       | yes      | yes
+  URB_NO_FSBR             | 0x00000020 | yes     | no        | no       | no
+  URB_ZERO_PACKET         | 0x00000040 | no      | no        | only out | no
+  URB_NO_INTERRUPT        | 0x00000080 | yes     | yes       | yes      | yes
+  URB_FREE_BUFFER         | 0x00000100 | yes     | yes       | yes      | yes
+  URB_DIR_MASK            | 0x00000200 | yes     | yes       | yes      | yes
+
+
+USBIP_RET_SUBMIT: Reply for submitting an URB
+
+ Offset    | Length | Value      | Description
+-----------+--------+------------+---------------------------------------------------
+ 0         | 4      | 0x00000003 | command
+-----------+--------+------------+---------------------------------------------------
+ 4         | 4      |            | seqnum: URB sequence number
+-----------+--------+------------+---------------------------------------------------
+ 8         | 4      |            | devid
+-----------+--------+------------+---------------------------------------------------
+ 0xC       | 4      |            | direction: 0: USBIP_DIR_OUT
+           |        |            |            1: USBIP_DIR_IN
+-----------+--------+------------+---------------------------------------------------
+ 0x10      | 4      |            | ep: endpoint number
+-----------+--------+------------+---------------------------------------------------
+ 0x14      | 4      |            | status: zero for successful URB transaction,
+           |        |            |   otherwise some kind of error happened.
+-----------+--------+------------+---------------------------------------------------
+ 0x18      | 4      | n          | actual_length: number of URB data bytes
+-----------+--------+------------+---------------------------------------------------
+ 0x1C      | 4      |            | start_frame: for an ISO frame the actually
+           |        |            |   selected frame for transmit.
+-----------+--------+------------+---------------------------------------------------
+ 0x20      | 4      |            | number_of_packets
+-----------+--------+------------+---------------------------------------------------
+ 0x24      | 4      |            | error_count
+-----------+--------+------------+---------------------------------------------------
+ 0x28      | 8      |            | setup: data bytes for USB setup, filled with
+           |        |            |   zeros if not used
+-----------+--------+------------+---------------------------------------------------
+ 0x30      | n      |            | URB data bytes. For ISO transfers the padding
+           |        |            |   between each ISO packets is not transmitted.
+
+USBIP_CMD_UNLINK: Unlink an URB
+
+ Offset    | Length | Value      | Description
+-----------+--------+------------+---------------------------------------------------
+ 0         | 4      | 0x00000002 | command: URB unlink command
+-----------+--------+------------+---------------------------------------------------
+ 4         | 4      |            | seqnum: URB sequence number to unlink: FIXME: is this so?
+-----------+--------+------------+---------------------------------------------------
+ 8         | 4      |            | devid
+-----------+--------+------------+---------------------------------------------------
+ 0xC       | 4      |            | direction: 0: USBIP_DIR_OUT
+           |        |            |            1: USBIP_DIR_IN
+-----------+--------+------------+---------------------------------------------------
+ 0x10      | 4      |            | ep: endpoint number: zero
+-----------+--------+------------+---------------------------------------------------
+ 0x14      | 4      |            | seqnum: the URB sequence number given previously
+           |        |            |   at USBIP_CMD_SUBMIT.seqnum field
+-----------+--------+------------+---------------------------------------------------
+ 0x30      | n      |            | URB data bytes. For ISO transfers the padding
+           |        |            |   between each ISO packets is not transmitted.
+
+USBIP_RET_UNLINK: Reply for URB unlink
+
+ Offset    | Length | Value      | Description
+-----------+--------+------------+---------------------------------------------------
+ 0         | 4      | 0x00000004 | command: reply for the URB unlink command
+-----------+--------+------------+---------------------------------------------------
+ 4         | 4      |            | seqnum: the unlinked URB sequence number
+-----------+--------+------------+---------------------------------------------------
+ 8         | 4      |            | devid
+-----------+--------+------------+---------------------------------------------------
+ 0xC       | 4      |            | direction: 0: USBIP_DIR_OUT
+           |        |            |            1: USBIP_DIR_IN
+-----------+--------+------------+---------------------------------------------------
+ 0x10      | 4      |            | ep: endpoint number
+-----------+--------+------------+---------------------------------------------------
+ 0x14      | 4      |            | status: This is the value contained in the
+           |        |            |   urb->status in the URB completition handler.
+           |        |            |   FIXME: a better explanation needed.
+-----------+--------+------------+---------------------------------------------------
+ 0x30      | n      |            | URB data bytes. For ISO transfers the padding
+           |        |            |   between each ISO packets is not transmitted.
diff --git a/drivers/usb/usbip/vhci.h b/drivers/usb/usbip/vhci.h
new file mode 100644
index 000000000000..a863a98a91ce
--- /dev/null
+++ b/drivers/usb/usbip/vhci.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2003-2008 Takahiro Hirofuchi
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#ifndef __USBIP_VHCI_H
+#define __USBIP_VHCI_H
+
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+#include <linux/usb.h>
+#include <linux/usb/hcd.h>
+#include <linux/wait.h>
+
+struct vhci_device {
+	struct usb_device *udev;
+
+	/*
+	 * devid specifies a remote usb device uniquely instead
+	 * of combination of busnum and devnum.
+	 */
+	__u32 devid;
+
+	/* speed of a remote device */
+	enum usb_device_speed speed;
+
+	/* vhci root-hub port to which this device is attached */
+	__u32 rhport;
+
+	struct usbip_device ud;
+
+	/* lock for the below link lists */
+	spinlock_t priv_lock;
+
+	/* vhci_priv is linked to one of them. */
+	struct list_head priv_tx;
+	struct list_head priv_rx;
+
+	/* vhci_unlink is linked to one of them */
+	struct list_head unlink_tx;
+	struct list_head unlink_rx;
+
+	/* vhci_tx thread sleeps for this queue */
+	wait_queue_head_t waitq_tx;
+};
+
+/* urb->hcpriv, use container_of() */
+struct vhci_priv {
+	unsigned long seqnum;
+	struct list_head list;
+
+	struct vhci_device *vdev;
+	struct urb *urb;
+};
+
+struct vhci_unlink {
+	/* seqnum of this request */
+	unsigned long seqnum;
+
+	struct list_head list;
+
+	/* seqnum of the unlink target */
+	unsigned long unlink_seqnum;
+};
+
+/* Number of supported ports. Value has an upperbound of USB_MAXCHILDREN */
+#define VHCI_NPORTS 8
+
+/* for usb_bus.hcpriv */
+struct vhci_hcd {
+	spinlock_t lock;
+
+	u32 port_status[VHCI_NPORTS];
+
+	unsigned resuming:1;
+	unsigned long re_timeout;
+
+	atomic_t seqnum;
+
+	/*
+	 * NOTE:
+	 * wIndex shows the port number and begins from 1.
+	 * But, the index of this array begins from 0.
+	 */
+	struct vhci_device vdev[VHCI_NPORTS];
+};
+
+extern struct vhci_hcd *the_controller;
+extern const struct attribute_group dev_attr_group;
+
+/* vhci_hcd.c */
+void rh_port_connect(int rhport, enum usb_device_speed speed);
+
+/* vhci_rx.c */
+struct urb *pickup_urb_and_free_priv(struct vhci_device *vdev, __u32 seqnum);
+int vhci_rx_loop(void *data);
+
+/* vhci_tx.c */
+int vhci_tx_loop(void *data);
+
+static inline struct vhci_device *port_to_vdev(__u32 port)
+{
+	return &the_controller->vdev[port];
+}
+
+static inline struct vhci_hcd *hcd_to_vhci(struct usb_hcd *hcd)
+{
+	return (struct vhci_hcd *) (hcd->hcd_priv);
+}
+
+static inline struct usb_hcd *vhci_to_hcd(struct vhci_hcd *vhci)
+{
+	return container_of((void *) vhci, struct usb_hcd, hcd_priv);
+}
+
+static inline struct device *vhci_dev(struct vhci_hcd *vhci)
+{
+	return vhci_to_hcd(vhci)->self.controller;
+}
+
+#endif /* __USBIP_VHCI_H */
diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c
new file mode 100644
index 000000000000..c02374b6049c
--- /dev/null
+++ b/drivers/usb/usbip/vhci_hcd.c
@@ -0,0 +1,1171 @@
+/*
+ * Copyright (C) 2003-2008 Takahiro Hirofuchi
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+#include <linux/init.h>
+#include <linux/file.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "usbip_common.h"
+#include "vhci.h"
+
+#define DRIVER_AUTHOR "Takahiro Hirofuchi"
+#define DRIVER_DESC "USB/IP 'Virtual' Host Controller (VHCI) Driver"
+
+/*
+ * TODO
+ *	- update root hub emulation
+ *	- move the emulation code to userland ?
+ *		porting to other operating systems
+ *		minimize kernel code
+ *	- add suspend/resume code
+ *	- clean up everything
+ */
+
+/* See usb gadget dummy hcd */
+
+static int vhci_hub_status(struct usb_hcd *hcd, char *buff);
+static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
+			    u16 wIndex, char *buff, u16 wLength);
+static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
+			    gfp_t mem_flags);
+static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status);
+static int vhci_start(struct usb_hcd *vhci_hcd);
+static void vhci_stop(struct usb_hcd *hcd);
+static int vhci_get_frame_number(struct usb_hcd *hcd);
+
+static const char driver_name[] = "vhci_hcd";
+static const char driver_desc[] = "USB/IP Virtual Host Controller";
+
+struct vhci_hcd *the_controller;
+
+static const char * const bit_desc[] = {
+	"CONNECTION",		/*0*/
+	"ENABLE",		/*1*/
+	"SUSPEND",		/*2*/
+	"OVER_CURRENT",		/*3*/
+	"RESET",		/*4*/
+	"R5",			/*5*/
+	"R6",			/*6*/
+	"R7",			/*7*/
+	"POWER",		/*8*/
+	"LOWSPEED",		/*9*/
+	"HIGHSPEED",		/*10*/
+	"PORT_TEST",		/*11*/
+	"INDICATOR",		/*12*/
+	"R13",			/*13*/
+	"R14",			/*14*/
+	"R15",			/*15*/
+	"C_CONNECTION",		/*16*/
+	"C_ENABLE",		/*17*/
+	"C_SUSPEND",		/*18*/
+	"C_OVER_CURRENT",	/*19*/
+	"C_RESET",		/*20*/
+	"R21",			/*21*/
+	"R22",			/*22*/
+	"R23",			/*23*/
+	"R24",			/*24*/
+	"R25",			/*25*/
+	"R26",			/*26*/
+	"R27",			/*27*/
+	"R28",			/*28*/
+	"R29",			/*29*/
+	"R30",			/*30*/
+	"R31",			/*31*/
+};
+
+static void dump_port_status_diff(u32 prev_status, u32 new_status)
+{
+	int i = 0;
+	u32 bit = 1;
+
+	pr_debug("status prev -> new: %08x -> %08x\n", prev_status, new_status);
+	while (bit) {
+		u32 prev = prev_status & bit;
+		u32 new = new_status & bit;
+		char change;
+
+		if (!prev && new)
+			change = '+';
+		else if (prev && !new)
+			change = '-';
+		else
+			change = ' ';
+
+		if (prev || new)
+			pr_debug(" %c%s\n", change, bit_desc[i]);
+		bit <<= 1;
+		i++;
+	}
+	pr_debug("\n");
+}
+
+void rh_port_connect(int rhport, enum usb_device_speed speed)
+{
+	usbip_dbg_vhci_rh("rh_port_connect %d\n", rhport);
+
+	spin_lock(&the_controller->lock);
+
+	the_controller->port_status[rhport] |= USB_PORT_STAT_CONNECTION
+		| (1 << USB_PORT_FEAT_C_CONNECTION);
+
+	switch (speed) {
+	case USB_SPEED_HIGH:
+		the_controller->port_status[rhport] |= USB_PORT_STAT_HIGH_SPEED;
+		break;
+	case USB_SPEED_LOW:
+		the_controller->port_status[rhport] |= USB_PORT_STAT_LOW_SPEED;
+		break;
+	default:
+		break;
+	}
+
+	spin_unlock(&the_controller->lock);
+
+	usb_hcd_poll_rh_status(vhci_to_hcd(the_controller));
+}
+
+static void rh_port_disconnect(int rhport)
+{
+	usbip_dbg_vhci_rh("rh_port_disconnect %d\n", rhport);
+
+	spin_lock(&the_controller->lock);
+
+	the_controller->port_status[rhport] &= ~USB_PORT_STAT_CONNECTION;
+	the_controller->port_status[rhport] |=
+					(1 << USB_PORT_FEAT_C_CONNECTION);
+
+	spin_unlock(&the_controller->lock);
+	usb_hcd_poll_rh_status(vhci_to_hcd(the_controller));
+}
+
+#define PORT_C_MASK				\
+	((USB_PORT_STAT_C_CONNECTION		\
+	  | USB_PORT_STAT_C_ENABLE		\
+	  | USB_PORT_STAT_C_SUSPEND		\
+	  | USB_PORT_STAT_C_OVERCURRENT		\
+	  | USB_PORT_STAT_C_RESET) << 16)
+
+/*
+ * Returns 0 if the status hasn't changed, or the number of bytes in buf.
+ * Ports are 0-indexed from the HCD point of view,
+ * and 1-indexed from the USB core pointer of view.
+ *
+ * @buf: a bitmap to show which port status has been changed.
+ *  bit  0: reserved
+ *  bit  1: the status of port 0 has been changed.
+ *  bit  2: the status of port 1 has been changed.
+ *  ...
+ */
+static int vhci_hub_status(struct usb_hcd *hcd, char *buf)
+{
+	struct vhci_hcd	*vhci;
+	int		retval;
+	int		rhport;
+	int		changed = 0;
+
+	retval = DIV_ROUND_UP(VHCI_NPORTS + 1, 8);
+	memset(buf, 0, retval);
+
+	vhci = hcd_to_vhci(hcd);
+
+	spin_lock(&vhci->lock);
+	if (!HCD_HW_ACCESSIBLE(hcd)) {
+		usbip_dbg_vhci_rh("hw accessible flag not on?\n");
+		goto done;
+	}
+
+	/* check pseudo status register for each port */
+	for (rhport = 0; rhport < VHCI_NPORTS; rhport++) {
+		if ((vhci->port_status[rhport] & PORT_C_MASK)) {
+			/* The status of a port has been changed, */
+			usbip_dbg_vhci_rh("port %d status changed\n", rhport);
+
+			buf[(rhport + 1) / 8] |= 1 << (rhport + 1) % 8;
+			changed = 1;
+		}
+	}
+
+	if ((hcd->state == HC_STATE_SUSPENDED) && (changed == 1))
+		usb_hcd_resume_root_hub(hcd);
+
+done:
+	spin_unlock(&vhci->lock);
+	return changed ? retval : 0;
+}
+
+static inline void hub_descriptor(struct usb_hub_descriptor *desc)
+{
+	memset(desc, 0, sizeof(*desc));
+	desc->bDescriptorType = 0x29;
+	desc->bDescLength = 9;
+	desc->wHubCharacteristics = (__constant_cpu_to_le16(0x0001));
+	desc->bNbrPorts = VHCI_NPORTS;
+	desc->u.hs.DeviceRemovable[0] = 0xff;
+	desc->u.hs.DeviceRemovable[1] = 0xff;
+}
+
+static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
+			    u16 wIndex, char *buf, u16 wLength)
+{
+	struct vhci_hcd	*dum;
+	int             retval = 0;
+	int		rhport;
+
+	u32 prev_port_status[VHCI_NPORTS];
+
+	if (!HCD_HW_ACCESSIBLE(hcd))
+		return -ETIMEDOUT;
+
+	/*
+	 * NOTE:
+	 * wIndex shows the port number and begins from 1.
+	 */
+	usbip_dbg_vhci_rh("typeReq %x wValue %x wIndex %x\n", typeReq, wValue,
+			  wIndex);
+	if (wIndex > VHCI_NPORTS)
+		pr_err("invalid port number %d\n", wIndex);
+	rhport = ((__u8)(wIndex & 0x00ff)) - 1;
+
+	dum = hcd_to_vhci(hcd);
+
+	spin_lock(&dum->lock);
+
+	/* store old status and compare now and old later */
+	if (usbip_dbg_flag_vhci_rh) {
+		memcpy(prev_port_status, dum->port_status,
+			sizeof(prev_port_status));
+	}
+
+	switch (typeReq) {
+	case ClearHubFeature:
+		usbip_dbg_vhci_rh(" ClearHubFeature\n");
+		break;
+	case ClearPortFeature:
+		switch (wValue) {
+		case USB_PORT_FEAT_SUSPEND:
+			if (dum->port_status[rhport] & USB_PORT_STAT_SUSPEND) {
+				/* 20msec signaling */
+				dum->resuming = 1;
+				dum->re_timeout =
+					jiffies + msecs_to_jiffies(20);
+			}
+			break;
+		case USB_PORT_FEAT_POWER:
+			usbip_dbg_vhci_rh(
+				" ClearPortFeature: USB_PORT_FEAT_POWER\n");
+			dum->port_status[rhport] = 0;
+			dum->resuming = 0;
+			break;
+		case USB_PORT_FEAT_C_RESET:
+			usbip_dbg_vhci_rh(
+				" ClearPortFeature: USB_PORT_FEAT_C_RESET\n");
+			switch (dum->vdev[rhport].speed) {
+			case USB_SPEED_HIGH:
+				dum->port_status[rhport] |=
+					USB_PORT_STAT_HIGH_SPEED;
+				break;
+			case USB_SPEED_LOW:
+				dum->port_status[rhport] |=
+					USB_PORT_STAT_LOW_SPEED;
+				break;
+			default:
+				break;
+			}
+		default:
+			usbip_dbg_vhci_rh(" ClearPortFeature: default %x\n",
+					  wValue);
+			dum->port_status[rhport] &= ~(1 << wValue);
+			break;
+		}
+		break;
+	case GetHubDescriptor:
+		usbip_dbg_vhci_rh(" GetHubDescriptor\n");
+		hub_descriptor((struct usb_hub_descriptor *) buf);
+		break;
+	case GetHubStatus:
+		usbip_dbg_vhci_rh(" GetHubStatus\n");
+		*(__le32 *) buf = cpu_to_le32(0);
+		break;
+	case GetPortStatus:
+		usbip_dbg_vhci_rh(" GetPortStatus port %x\n", wIndex);
+		if (wIndex > VHCI_NPORTS || wIndex < 1) {
+			pr_err("invalid port number %d\n", wIndex);
+			retval = -EPIPE;
+		}
+
+		/* we do not care about resume. */
+
+		/* whoever resets or resumes must GetPortStatus to
+		 * complete it!!
+		 */
+		if (dum->resuming && time_after(jiffies, dum->re_timeout)) {
+			dum->port_status[rhport] |=
+				(1 << USB_PORT_FEAT_C_SUSPEND);
+			dum->port_status[rhport] &=
+				~(1 << USB_PORT_FEAT_SUSPEND);
+			dum->resuming = 0;
+			dum->re_timeout = 0;
+		}
+
+		if ((dum->port_status[rhport] & (1 << USB_PORT_FEAT_RESET)) !=
+		    0 && time_after(jiffies, dum->re_timeout)) {
+			dum->port_status[rhport] |=
+				(1 << USB_PORT_FEAT_C_RESET);
+			dum->port_status[rhport] &=
+				~(1 << USB_PORT_FEAT_RESET);
+			dum->re_timeout = 0;
+
+			if (dum->vdev[rhport].ud.status ==
+			    VDEV_ST_NOTASSIGNED) {
+				usbip_dbg_vhci_rh(
+					" enable rhport %d (status %u)\n",
+					rhport,
+					dum->vdev[rhport].ud.status);
+				dum->port_status[rhport] |=
+					USB_PORT_STAT_ENABLE;
+			}
+		}
+		((__le16 *) buf)[0] = cpu_to_le16(dum->port_status[rhport]);
+		((__le16 *) buf)[1] =
+			cpu_to_le16(dum->port_status[rhport] >> 16);
+
+		usbip_dbg_vhci_rh(" GetPortStatus bye %x %x\n", ((u16 *)buf)[0],
+				  ((u16 *)buf)[1]);
+		break;
+	case SetHubFeature:
+		usbip_dbg_vhci_rh(" SetHubFeature\n");
+		retval = -EPIPE;
+		break;
+	case SetPortFeature:
+		switch (wValue) {
+		case USB_PORT_FEAT_SUSPEND:
+			usbip_dbg_vhci_rh(
+				" SetPortFeature: USB_PORT_FEAT_SUSPEND\n");
+			break;
+		case USB_PORT_FEAT_RESET:
+			usbip_dbg_vhci_rh(
+				" SetPortFeature: USB_PORT_FEAT_RESET\n");
+			/* if it's already running, disconnect first */
+			if (dum->port_status[rhport] & USB_PORT_STAT_ENABLE) {
+				dum->port_status[rhport] &=
+					~(USB_PORT_STAT_ENABLE |
+					  USB_PORT_STAT_LOW_SPEED |
+					  USB_PORT_STAT_HIGH_SPEED);
+				/* FIXME test that code path! */
+			}
+			/* 50msec reset signaling */
+			dum->re_timeout = jiffies + msecs_to_jiffies(50);
+
+			/* FALLTHROUGH */
+		default:
+			usbip_dbg_vhci_rh(" SetPortFeature: default %d\n",
+					  wValue);
+			dum->port_status[rhport] |= (1 << wValue);
+			break;
+		}
+		break;
+
+	default:
+		pr_err("default: no such request\n");
+
+		/* "protocol stall" on error */
+		retval = -EPIPE;
+	}
+
+	if (usbip_dbg_flag_vhci_rh) {
+		pr_debug("port %d\n", rhport);
+		/* Only dump valid port status */
+		if (rhport >= 0) {
+			dump_port_status_diff(prev_port_status[rhport],
+					      dum->port_status[rhport]);
+		}
+	}
+	usbip_dbg_vhci_rh(" bye\n");
+
+	spin_unlock(&dum->lock);
+
+	return retval;
+}
+
+static struct vhci_device *get_vdev(struct usb_device *udev)
+{
+	int i;
+
+	if (!udev)
+		return NULL;
+
+	for (i = 0; i < VHCI_NPORTS; i++)
+		if (the_controller->vdev[i].udev == udev)
+			return port_to_vdev(i);
+
+	return NULL;
+}
+
+static void vhci_tx_urb(struct urb *urb)
+{
+	struct vhci_device *vdev = get_vdev(urb->dev);
+	struct vhci_priv *priv;
+
+	if (!vdev) {
+		pr_err("could not get virtual device");
+		return;
+	}
+
+	priv = kzalloc(sizeof(struct vhci_priv), GFP_ATOMIC);
+	if (!priv) {
+		usbip_event_add(&vdev->ud, VDEV_EVENT_ERROR_MALLOC);
+		return;
+	}
+
+	spin_lock(&vdev->priv_lock);
+
+	priv->seqnum = atomic_inc_return(&the_controller->seqnum);
+	if (priv->seqnum == 0xffff)
+		dev_info(&urb->dev->dev, "seqnum max\n");
+
+	priv->vdev = vdev;
+	priv->urb = urb;
+
+	urb->hcpriv = (void *) priv;
+
+	list_add_tail(&priv->list, &vdev->priv_tx);
+
+	wake_up(&vdev->waitq_tx);
+	spin_unlock(&vdev->priv_lock);
+}
+
+static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
+			    gfp_t mem_flags)
+{
+	struct device *dev = &urb->dev->dev;
+	int ret = 0;
+	struct vhci_device *vdev;
+
+	usbip_dbg_vhci_hc("enter, usb_hcd %p urb %p mem_flags %d\n",
+			  hcd, urb, mem_flags);
+
+	/* patch to usb_sg_init() is in 2.5.60 */
+	BUG_ON(!urb->transfer_buffer && urb->transfer_buffer_length);
+
+	spin_lock(&the_controller->lock);
+
+	if (urb->status != -EINPROGRESS) {
+		dev_err(dev, "URB already unlinked!, status %d\n", urb->status);
+		spin_unlock(&the_controller->lock);
+		return urb->status;
+	}
+
+	vdev = port_to_vdev(urb->dev->portnum-1);
+
+	/* refuse enqueue for dead connection */
+	spin_lock(&vdev->ud.lock);
+	if (vdev->ud.status == VDEV_ST_NULL ||
+	    vdev->ud.status == VDEV_ST_ERROR) {
+		dev_err(dev, "enqueue for inactive port %d\n", vdev->rhport);
+		spin_unlock(&vdev->ud.lock);
+		spin_unlock(&the_controller->lock);
+		return -ENODEV;
+	}
+	spin_unlock(&vdev->ud.lock);
+
+	ret = usb_hcd_link_urb_to_ep(hcd, urb);
+	if (ret)
+		goto no_need_unlink;
+
+	/*
+	 * The enumeration process is as follows;
+	 *
+	 *  1. Get_Descriptor request to DevAddrs(0) EndPoint(0)
+	 *     to get max packet length of default pipe
+	 *
+	 *  2. Set_Address request to DevAddr(0) EndPoint(0)
+	 *
+	 */
+	if (usb_pipedevice(urb->pipe) == 0) {
+		__u8 type = usb_pipetype(urb->pipe);
+		struct usb_ctrlrequest *ctrlreq =
+			(struct usb_ctrlrequest *) urb->setup_packet;
+
+		if (type != PIPE_CONTROL || !ctrlreq) {
+			dev_err(dev, "invalid request to devnum 0\n");
+			ret = -EINVAL;
+			goto no_need_xmit;
+		}
+
+		switch (ctrlreq->bRequest) {
+		case USB_REQ_SET_ADDRESS:
+			/* set_address may come when a device is reset */
+			dev_info(dev, "SetAddress Request (%d) to port %d\n",
+				 ctrlreq->wValue, vdev->rhport);
+
+			if (vdev->udev)
+				usb_put_dev(vdev->udev);
+			vdev->udev = usb_get_dev(urb->dev);
+
+			spin_lock(&vdev->ud.lock);
+			vdev->ud.status = VDEV_ST_USED;
+			spin_unlock(&vdev->ud.lock);
+
+			if (urb->status == -EINPROGRESS) {
+				/* This request is successfully completed. */
+				/* If not -EINPROGRESS, possibly unlinked. */
+				urb->status = 0;
+			}
+
+			goto no_need_xmit;
+
+		case USB_REQ_GET_DESCRIPTOR:
+			if (ctrlreq->wValue == cpu_to_le16(USB_DT_DEVICE << 8))
+				usbip_dbg_vhci_hc(
+					"Not yet?:Get_Descriptor to device 0 (get max pipe size)\n");
+
+			if (vdev->udev)
+				usb_put_dev(vdev->udev);
+			vdev->udev = usb_get_dev(urb->dev);
+			goto out;
+
+		default:
+			/* NOT REACHED */
+			dev_err(dev,
+				"invalid request to devnum 0 bRequest %u, wValue %u\n",
+				ctrlreq->bRequest,
+				ctrlreq->wValue);
+			ret =  -EINVAL;
+			goto no_need_xmit;
+		}
+
+	}
+
+out:
+	vhci_tx_urb(urb);
+	spin_unlock(&the_controller->lock);
+
+	return 0;
+
+no_need_xmit:
+	usb_hcd_unlink_urb_from_ep(hcd, urb);
+no_need_unlink:
+	spin_unlock(&the_controller->lock);
+	usb_hcd_giveback_urb(vhci_to_hcd(the_controller), urb, urb->status);
+	return ret;
+}
+
+/*
+ * vhci_rx gives back the urb after receiving the reply of the urb.  If an
+ * unlink pdu is sent or not, vhci_rx receives a normal return pdu and gives
+ * back its urb. For the driver unlinking the urb, the content of the urb is
+ * not important, but the calling to its completion handler is important; the
+ * completion of unlinking is notified by the completion handler.
+ *
+ *
+ * CLIENT SIDE
+ *
+ * - When vhci_hcd receives RET_SUBMIT,
+ *
+ *	- case 1a). the urb of the pdu is not unlinking.
+ *		- normal case
+ *		=> just give back the urb
+ *
+ *	- case 1b). the urb of the pdu is unlinking.
+ *		- usbip.ko will return a reply of the unlinking request.
+ *		=> give back the urb now and go to case 2b).
+ *
+ * - When vhci_hcd receives RET_UNLINK,
+ *
+ *	- case 2a). a submit request is still pending in vhci_hcd.
+ *		- urb was really pending in usbip.ko and urb_unlink_urb() was
+ *		  completed there.
+ *		=> free a pending submit request
+ *		=> notify unlink completeness by giving back the urb
+ *
+ *	- case 2b). a submit request is *not* pending in vhci_hcd.
+ *		- urb was already given back to the core driver.
+ *		=> do not give back the urb
+ *
+ *
+ * SERVER SIDE
+ *
+ * - When usbip receives CMD_UNLINK,
+ *
+ *	- case 3a). the urb of the unlink request is now in submission.
+ *		=> do usb_unlink_urb().
+ *		=> after the unlink is completed, send RET_UNLINK.
+ *
+ *	- case 3b). the urb of the unlink request is not in submission.
+ *		- may be already completed or never be received
+ *		=> send RET_UNLINK
+ *
+ */
+static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
+{
+	struct vhci_priv *priv;
+	struct vhci_device *vdev;
+
+	pr_info("dequeue a urb %p\n", urb);
+
+	spin_lock(&the_controller->lock);
+
+	priv = urb->hcpriv;
+	if (!priv) {
+		/* URB was never linked! or will be soon given back by
+		 * vhci_rx. */
+		spin_unlock(&the_controller->lock);
+		return 0;
+	}
+
+	{
+		int ret = 0;
+
+		ret = usb_hcd_check_unlink_urb(hcd, urb, status);
+		if (ret) {
+			spin_unlock(&the_controller->lock);
+			return ret;
+		}
+	}
+
+	 /* send unlink request here? */
+	vdev = priv->vdev;
+
+	if (!vdev->ud.tcp_socket) {
+		/* tcp connection is closed */
+		spin_lock(&vdev->priv_lock);
+
+		pr_info("device %p seems to be disconnected\n", vdev);
+		list_del(&priv->list);
+		kfree(priv);
+		urb->hcpriv = NULL;
+
+		spin_unlock(&vdev->priv_lock);
+
+		/*
+		 * If tcp connection is alive, we have sent CMD_UNLINK.
+		 * vhci_rx will receive RET_UNLINK and give back the URB.
+		 * Otherwise, we give back it here.
+		 */
+		pr_info("gives back urb %p\n", urb);
+
+		usb_hcd_unlink_urb_from_ep(hcd, urb);
+
+		spin_unlock(&the_controller->lock);
+		usb_hcd_giveback_urb(vhci_to_hcd(the_controller), urb,
+				     urb->status);
+		spin_lock(&the_controller->lock);
+
+	} else {
+		/* tcp connection is alive */
+		struct vhci_unlink *unlink;
+
+		spin_lock(&vdev->priv_lock);
+
+		/* setup CMD_UNLINK pdu */
+		unlink = kzalloc(sizeof(struct vhci_unlink), GFP_ATOMIC);
+		if (!unlink) {
+			spin_unlock(&vdev->priv_lock);
+			spin_unlock(&the_controller->lock);
+			usbip_event_add(&vdev->ud, VDEV_EVENT_ERROR_MALLOC);
+			return -ENOMEM;
+		}
+
+		unlink->seqnum = atomic_inc_return(&the_controller->seqnum);
+		if (unlink->seqnum == 0xffff)
+			pr_info("seqnum max\n");
+
+		unlink->unlink_seqnum = priv->seqnum;
+
+		pr_info("device %p seems to be still connected\n", vdev);
+
+		/* send cmd_unlink and try to cancel the pending URB in the
+		 * peer */
+		list_add_tail(&unlink->list, &vdev->unlink_tx);
+		wake_up(&vdev->waitq_tx);
+
+		spin_unlock(&vdev->priv_lock);
+	}
+
+	spin_unlock(&the_controller->lock);
+
+	usbip_dbg_vhci_hc("leave\n");
+	return 0;
+}
+
+static void vhci_device_unlink_cleanup(struct vhci_device *vdev)
+{
+	struct vhci_unlink *unlink, *tmp;
+
+	spin_lock(&the_controller->lock);
+	spin_lock(&vdev->priv_lock);
+
+	list_for_each_entry_safe(unlink, tmp, &vdev->unlink_tx, list) {
+		pr_info("unlink cleanup tx %lu\n", unlink->unlink_seqnum);
+		list_del(&unlink->list);
+		kfree(unlink);
+	}
+
+	while (!list_empty(&vdev->unlink_rx)) {
+		struct urb *urb;
+
+		unlink = list_first_entry(&vdev->unlink_rx, struct vhci_unlink,
+			list);
+
+		/* give back URB of unanswered unlink request */
+		pr_info("unlink cleanup rx %lu\n", unlink->unlink_seqnum);
+
+		urb = pickup_urb_and_free_priv(vdev, unlink->unlink_seqnum);
+		if (!urb) {
+			pr_info("the urb (seqnum %lu) was already given back\n",
+				unlink->unlink_seqnum);
+			list_del(&unlink->list);
+			kfree(unlink);
+			continue;
+		}
+
+		urb->status = -ENODEV;
+
+		usb_hcd_unlink_urb_from_ep(vhci_to_hcd(the_controller), urb);
+
+		list_del(&unlink->list);
+
+		spin_unlock(&vdev->priv_lock);
+		spin_unlock(&the_controller->lock);
+
+		usb_hcd_giveback_urb(vhci_to_hcd(the_controller), urb,
+				     urb->status);
+
+		spin_lock(&the_controller->lock);
+		spin_lock(&vdev->priv_lock);
+
+		kfree(unlink);
+	}
+
+	spin_unlock(&vdev->priv_lock);
+	spin_unlock(&the_controller->lock);
+}
+
+/*
+ * The important thing is that only one context begins cleanup.
+ * This is why error handling and cleanup become simple.
+ * We do not want to consider race condition as possible.
+ */
+static void vhci_shutdown_connection(struct usbip_device *ud)
+{
+	struct vhci_device *vdev = container_of(ud, struct vhci_device, ud);
+
+	/* need this? see stub_dev.c */
+	if (ud->tcp_socket) {
+		pr_debug("shutdown tcp_socket %p\n", ud->tcp_socket);
+		kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR);
+	}
+
+	/* kill threads related to this sdev */
+	if (vdev->ud.tcp_rx) {
+		kthread_stop_put(vdev->ud.tcp_rx);
+		vdev->ud.tcp_rx = NULL;
+	}
+	if (vdev->ud.tcp_tx) {
+		kthread_stop_put(vdev->ud.tcp_tx);
+		vdev->ud.tcp_tx = NULL;
+	}
+	pr_info("stop threads\n");
+
+	/* active connection is closed */
+	if (vdev->ud.tcp_socket) {
+		sockfd_put(vdev->ud.tcp_socket);
+		vdev->ud.tcp_socket = NULL;
+	}
+	pr_info("release socket\n");
+
+	vhci_device_unlink_cleanup(vdev);
+
+	/*
+	 * rh_port_disconnect() is a trigger of ...
+	 *   usb_disable_device():
+	 *	disable all the endpoints for a USB device.
+	 *   usb_disable_endpoint():
+	 *	disable endpoints. pending urbs are unlinked(dequeued).
+	 *
+	 * NOTE: After calling rh_port_disconnect(), the USB device drivers of a
+	 * detached device should release used urbs in a cleanup function (i.e.
+	 * xxx_disconnect()). Therefore, vhci_hcd does not need to release
+	 * pushed urbs and their private data in this function.
+	 *
+	 * NOTE: vhci_dequeue() must be considered carefully. When shutting down
+	 * a connection, vhci_shutdown_connection() expects vhci_dequeue()
+	 * gives back pushed urbs and frees their private data by request of
+	 * the cleanup function of a USB driver. When unlinking a urb with an
+	 * active connection, vhci_dequeue() does not give back the urb which
+	 * is actually given back by vhci_rx after receiving its return pdu.
+	 *
+	 */
+	rh_port_disconnect(vdev->rhport);
+
+	pr_info("disconnect device\n");
+}
+
+
+static void vhci_device_reset(struct usbip_device *ud)
+{
+	struct vhci_device *vdev = container_of(ud, struct vhci_device, ud);
+
+	spin_lock(&ud->lock);
+
+	vdev->speed  = 0;
+	vdev->devid  = 0;
+
+	if (vdev->udev)
+		usb_put_dev(vdev->udev);
+	vdev->udev = NULL;
+
+	if (ud->tcp_socket) {
+		sockfd_put(ud->tcp_socket);
+		ud->tcp_socket = NULL;
+	}
+	ud->status = VDEV_ST_NULL;
+
+	spin_unlock(&ud->lock);
+}
+
+static void vhci_device_unusable(struct usbip_device *ud)
+{
+	spin_lock(&ud->lock);
+	ud->status = VDEV_ST_ERROR;
+	spin_unlock(&ud->lock);
+}
+
+static void vhci_device_init(struct vhci_device *vdev)
+{
+	memset(vdev, 0, sizeof(*vdev));
+
+	vdev->ud.side   = USBIP_VHCI;
+	vdev->ud.status = VDEV_ST_NULL;
+	spin_lock_init(&vdev->ud.lock);
+
+	INIT_LIST_HEAD(&vdev->priv_rx);
+	INIT_LIST_HEAD(&vdev->priv_tx);
+	INIT_LIST_HEAD(&vdev->unlink_tx);
+	INIT_LIST_HEAD(&vdev->unlink_rx);
+	spin_lock_init(&vdev->priv_lock);
+
+	init_waitqueue_head(&vdev->waitq_tx);
+
+	vdev->ud.eh_ops.shutdown = vhci_shutdown_connection;
+	vdev->ud.eh_ops.reset = vhci_device_reset;
+	vdev->ud.eh_ops.unusable = vhci_device_unusable;
+
+	usbip_start_eh(&vdev->ud);
+}
+
+static int vhci_start(struct usb_hcd *hcd)
+{
+	struct vhci_hcd *vhci = hcd_to_vhci(hcd);
+	int rhport;
+	int err = 0;
+
+	usbip_dbg_vhci_hc("enter vhci_start\n");
+
+	/* initialize private data of usb_hcd */
+
+	for (rhport = 0; rhport < VHCI_NPORTS; rhport++) {
+		struct vhci_device *vdev = &vhci->vdev[rhport];
+
+		vhci_device_init(vdev);
+		vdev->rhport = rhport;
+	}
+
+	atomic_set(&vhci->seqnum, 0);
+	spin_lock_init(&vhci->lock);
+
+	hcd->power_budget = 0; /* no limit */
+	hcd->uses_new_polling = 1;
+
+	/* vhci_hcd is now ready to be controlled through sysfs */
+	err = sysfs_create_group(&vhci_dev(vhci)->kobj, &dev_attr_group);
+	if (err) {
+		pr_err("create sysfs files\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static void vhci_stop(struct usb_hcd *hcd)
+{
+	struct vhci_hcd *vhci = hcd_to_vhci(hcd);
+	int rhport = 0;
+
+	usbip_dbg_vhci_hc("stop VHCI controller\n");
+
+	/* 1. remove the userland interface of vhci_hcd */
+	sysfs_remove_group(&vhci_dev(vhci)->kobj, &dev_attr_group);
+
+	/* 2. shutdown all the ports of vhci_hcd */
+	for (rhport = 0; rhport < VHCI_NPORTS; rhport++) {
+		struct vhci_device *vdev = &vhci->vdev[rhport];
+
+		usbip_event_add(&vdev->ud, VDEV_EVENT_REMOVED);
+		usbip_stop_eh(&vdev->ud);
+	}
+}
+
+static int vhci_get_frame_number(struct usb_hcd *hcd)
+{
+	pr_err("Not yet implemented\n");
+	return 0;
+}
+
+#ifdef CONFIG_PM
+
+/* FIXME: suspend/resume */
+static int vhci_bus_suspend(struct usb_hcd *hcd)
+{
+	struct vhci_hcd *vhci = hcd_to_vhci(hcd);
+
+	dev_dbg(&hcd->self.root_hub->dev, "%s\n", __func__);
+
+	spin_lock(&vhci->lock);
+	hcd->state = HC_STATE_SUSPENDED;
+	spin_unlock(&vhci->lock);
+
+	return 0;
+}
+
+static int vhci_bus_resume(struct usb_hcd *hcd)
+{
+	struct vhci_hcd *vhci = hcd_to_vhci(hcd);
+	int rc = 0;
+
+	dev_dbg(&hcd->self.root_hub->dev, "%s\n", __func__);
+
+	spin_lock(&vhci->lock);
+	if (!HCD_HW_ACCESSIBLE(hcd))
+		rc = -ESHUTDOWN;
+	else
+		hcd->state = HC_STATE_RUNNING;
+	spin_unlock(&vhci->lock);
+
+	return rc;
+}
+
+#else
+
+#define vhci_bus_suspend      NULL
+#define vhci_bus_resume       NULL
+#endif
+
+static struct hc_driver vhci_hc_driver = {
+	.description	= driver_name,
+	.product_desc	= driver_desc,
+	.hcd_priv_size	= sizeof(struct vhci_hcd),
+
+	.flags		= HCD_USB2,
+
+	.start		= vhci_start,
+	.stop		= vhci_stop,
+
+	.urb_enqueue	= vhci_urb_enqueue,
+	.urb_dequeue	= vhci_urb_dequeue,
+
+	.get_frame_number = vhci_get_frame_number,
+
+	.hub_status_data = vhci_hub_status,
+	.hub_control    = vhci_hub_control,
+	.bus_suspend	= vhci_bus_suspend,
+	.bus_resume	= vhci_bus_resume,
+};
+
+static int vhci_hcd_probe(struct platform_device *pdev)
+{
+	struct usb_hcd		*hcd;
+	int			ret;
+
+	usbip_dbg_vhci_hc("name %s id %d\n", pdev->name, pdev->id);
+
+	/*
+	 * Allocate and initialize hcd.
+	 * Our private data is also allocated automatically.
+	 */
+	hcd = usb_create_hcd(&vhci_hc_driver, &pdev->dev, dev_name(&pdev->dev));
+	if (!hcd) {
+		pr_err("create hcd failed\n");
+		return -ENOMEM;
+	}
+	hcd->has_tt = 1;
+
+	/* this is private data for vhci_hcd */
+	the_controller = hcd_to_vhci(hcd);
+
+	/*
+	 * Finish generic HCD structure initialization and register.
+	 * Call the driver's reset() and start() routines.
+	 */
+	ret = usb_add_hcd(hcd, 0, 0);
+	if (ret != 0) {
+		pr_err("usb_add_hcd failed %d\n", ret);
+		usb_put_hcd(hcd);
+		the_controller = NULL;
+		return ret;
+	}
+
+	usbip_dbg_vhci_hc("bye\n");
+	return 0;
+}
+
+static int vhci_hcd_remove(struct platform_device *pdev)
+{
+	struct usb_hcd	*hcd;
+
+	hcd = platform_get_drvdata(pdev);
+	if (!hcd)
+		return 0;
+
+	/*
+	 * Disconnects the root hub,
+	 * then reverses the effects of usb_add_hcd(),
+	 * invoking the HCD's stop() methods.
+	 */
+	usb_remove_hcd(hcd);
+	usb_put_hcd(hcd);
+	the_controller = NULL;
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+
+/* what should happen for USB/IP under suspend/resume? */
+static int vhci_hcd_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct usb_hcd *hcd;
+	int rhport = 0;
+	int connected = 0;
+	int ret = 0;
+
+	hcd = platform_get_drvdata(pdev);
+
+	spin_lock(&the_controller->lock);
+
+	for (rhport = 0; rhport < VHCI_NPORTS; rhport++)
+		if (the_controller->port_status[rhport] &
+		    USB_PORT_STAT_CONNECTION)
+			connected += 1;
+
+	spin_unlock(&the_controller->lock);
+
+	if (connected > 0) {
+		dev_info(&pdev->dev,
+			 "We have %d active connection%s. Do not suspend.\n",
+			 connected, (connected == 1 ? "" : "s"));
+		ret =  -EBUSY;
+	} else {
+		dev_info(&pdev->dev, "suspend vhci_hcd");
+		clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
+	}
+
+	return ret;
+}
+
+static int vhci_hcd_resume(struct platform_device *pdev)
+{
+	struct usb_hcd *hcd;
+
+	dev_dbg(&pdev->dev, "%s\n", __func__);
+
+	hcd = platform_get_drvdata(pdev);
+	set_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
+	usb_hcd_poll_rh_status(hcd);
+
+	return 0;
+}
+
+#else
+
+#define vhci_hcd_suspend	NULL
+#define vhci_hcd_resume		NULL
+
+#endif
+
+static struct platform_driver vhci_driver = {
+	.probe	= vhci_hcd_probe,
+	.remove	= vhci_hcd_remove,
+	.suspend = vhci_hcd_suspend,
+	.resume	= vhci_hcd_resume,
+	.driver	= {
+		.name = driver_name,
+		.owner = THIS_MODULE,
+	},
+};
+
+/*
+ * The VHCI 'device' is 'virtual'; not a real plug&play hardware.
+ * We need to add this virtual device as a platform device arbitrarily:
+ *	1. platform_device_register()
+ */
+static void the_pdev_release(struct device *dev)
+{
+}
+
+static struct platform_device the_pdev = {
+	/* should be the same name as driver_name */
+	.name = driver_name,
+	.id = -1,
+	.dev = {
+		.release = the_pdev_release,
+	},
+};
+
+static int __init vhci_hcd_init(void)
+{
+	int ret;
+
+	if (usb_disabled())
+		return -ENODEV;
+
+	ret = platform_driver_register(&vhci_driver);
+	if (ret)
+		goto err_driver_register;
+
+	ret = platform_device_register(&the_pdev);
+	if (ret)
+		goto err_platform_device_register;
+
+	pr_info(DRIVER_DESC " v" USBIP_VERSION "\n");
+	return ret;
+
+err_platform_device_register:
+	platform_driver_unregister(&vhci_driver);
+err_driver_register:
+	return ret;
+}
+
+static void __exit vhci_hcd_exit(void)
+{
+	platform_device_unregister(&the_pdev);
+	platform_driver_unregister(&vhci_driver);
+}
+
+module_init(vhci_hcd_init);
+module_exit(vhci_hcd_exit);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+MODULE_VERSION(USBIP_VERSION);
diff --git a/drivers/usb/usbip/vhci_rx.c b/drivers/usb/usbip/vhci_rx.c
new file mode 100644
index 000000000000..00e4a54308e4
--- /dev/null
+++ b/drivers/usb/usbip/vhci_rx.c
@@ -0,0 +1,268 @@
+/*
+ * Copyright (C) 2003-2008 Takahiro Hirofuchi
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+#include <linux/kthread.h>
+#include <linux/slab.h>
+
+#include "usbip_common.h"
+#include "vhci.h"
+
+/* get URB from transmitted urb queue. caller must hold vdev->priv_lock */
+struct urb *pickup_urb_and_free_priv(struct vhci_device *vdev, __u32 seqnum)
+{
+	struct vhci_priv *priv, *tmp;
+	struct urb *urb = NULL;
+	int status;
+
+	list_for_each_entry_safe(priv, tmp, &vdev->priv_rx, list) {
+		if (priv->seqnum != seqnum)
+			continue;
+
+		urb = priv->urb;
+		status = urb->status;
+
+		usbip_dbg_vhci_rx("find urb %p vurb %p seqnum %u\n",
+				urb, priv, seqnum);
+
+		switch (status) {
+		case -ENOENT:
+			/* fall through */
+		case -ECONNRESET:
+			dev_info(&urb->dev->dev,
+				 "urb %p was unlinked %ssynchronuously.\n", urb,
+				 status == -ENOENT ? "" : "a");
+			break;
+		case -EINPROGRESS:
+			/* no info output */
+			break;
+		default:
+			dev_info(&urb->dev->dev,
+				 "urb %p may be in a error, status %d\n", urb,
+				 status);
+		}
+
+		list_del(&priv->list);
+		kfree(priv);
+		urb->hcpriv = NULL;
+
+		break;
+	}
+
+	return urb;
+}
+
+static void vhci_recv_ret_submit(struct vhci_device *vdev,
+				 struct usbip_header *pdu)
+{
+	struct usbip_device *ud = &vdev->ud;
+	struct urb *urb;
+
+	spin_lock(&vdev->priv_lock);
+	urb = pickup_urb_and_free_priv(vdev, pdu->base.seqnum);
+	spin_unlock(&vdev->priv_lock);
+
+	if (!urb) {
+		pr_err("cannot find a urb of seqnum %u\n", pdu->base.seqnum);
+		pr_info("max seqnum %d\n",
+			atomic_read(&the_controller->seqnum));
+		usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
+		return;
+	}
+
+	/* unpack the pdu to a urb */
+	usbip_pack_pdu(pdu, urb, USBIP_RET_SUBMIT, 0);
+
+	/* recv transfer buffer */
+	if (usbip_recv_xbuff(ud, urb) < 0)
+		return;
+
+	/* recv iso_packet_descriptor */
+	if (usbip_recv_iso(ud, urb) < 0)
+		return;
+
+	/* restore the padding in iso packets */
+	usbip_pad_iso(ud, urb);
+
+	if (usbip_dbg_flag_vhci_rx)
+		usbip_dump_urb(urb);
+
+	usbip_dbg_vhci_rx("now giveback urb %p\n", urb);
+
+	spin_lock(&the_controller->lock);
+	usb_hcd_unlink_urb_from_ep(vhci_to_hcd(the_controller), urb);
+	spin_unlock(&the_controller->lock);
+
+	usb_hcd_giveback_urb(vhci_to_hcd(the_controller), urb, urb->status);
+
+	usbip_dbg_vhci_rx("Leave\n");
+}
+
+static struct vhci_unlink *dequeue_pending_unlink(struct vhci_device *vdev,
+						  struct usbip_header *pdu)
+{
+	struct vhci_unlink *unlink, *tmp;
+
+	spin_lock(&vdev->priv_lock);
+
+	list_for_each_entry_safe(unlink, tmp, &vdev->unlink_rx, list) {
+		pr_info("unlink->seqnum %lu\n", unlink->seqnum);
+		if (unlink->seqnum == pdu->base.seqnum) {
+			usbip_dbg_vhci_rx("found pending unlink, %lu\n",
+					  unlink->seqnum);
+			list_del(&unlink->list);
+
+			spin_unlock(&vdev->priv_lock);
+			return unlink;
+		}
+	}
+
+	spin_unlock(&vdev->priv_lock);
+
+	return NULL;
+}
+
+static void vhci_recv_ret_unlink(struct vhci_device *vdev,
+				 struct usbip_header *pdu)
+{
+	struct vhci_unlink *unlink;
+	struct urb *urb;
+
+	usbip_dump_header(pdu);
+
+	unlink = dequeue_pending_unlink(vdev, pdu);
+	if (!unlink) {
+		pr_info("cannot find the pending unlink %u\n",
+			pdu->base.seqnum);
+		return;
+	}
+
+	spin_lock(&vdev->priv_lock);
+	urb = pickup_urb_and_free_priv(vdev, unlink->unlink_seqnum);
+	spin_unlock(&vdev->priv_lock);
+
+	if (!urb) {
+		/*
+		 * I get the result of a unlink request. But, it seems that I
+		 * already received the result of its submit result and gave
+		 * back the URB.
+		 */
+		pr_info("the urb (seqnum %d) was already given back\n",
+			pdu->base.seqnum);
+	} else {
+		usbip_dbg_vhci_rx("now giveback urb %p\n", urb);
+
+		/* If unlink is successful, status is -ECONNRESET */
+		urb->status = pdu->u.ret_unlink.status;
+		pr_info("urb->status %d\n", urb->status);
+
+		spin_lock(&the_controller->lock);
+		usb_hcd_unlink_urb_from_ep(vhci_to_hcd(the_controller), urb);
+		spin_unlock(&the_controller->lock);
+
+		usb_hcd_giveback_urb(vhci_to_hcd(the_controller), urb,
+				     urb->status);
+	}
+
+	kfree(unlink);
+}
+
+static int vhci_priv_tx_empty(struct vhci_device *vdev)
+{
+	int empty = 0;
+
+	spin_lock(&vdev->priv_lock);
+	empty = list_empty(&vdev->priv_rx);
+	spin_unlock(&vdev->priv_lock);
+
+	return empty;
+}
+
+/* recv a pdu */
+static void vhci_rx_pdu(struct usbip_device *ud)
+{
+	int ret;
+	struct usbip_header pdu;
+	struct vhci_device *vdev = container_of(ud, struct vhci_device, ud);
+
+	usbip_dbg_vhci_rx("Enter\n");
+
+	memset(&pdu, 0, sizeof(pdu));
+
+	/* receive a pdu header */
+	ret = usbip_recv(ud->tcp_socket, &pdu, sizeof(pdu));
+	if (ret < 0) {
+		if (ret == -ECONNRESET)
+			pr_info("connection reset by peer\n");
+		else if (ret == -EAGAIN) {
+			/* ignore if connection was idle */
+			if (vhci_priv_tx_empty(vdev))
+				return;
+			pr_info("connection timed out with pending urbs\n");
+		} else if (ret != -ERESTARTSYS)
+			pr_info("xmit failed %d\n", ret);
+
+		usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
+		return;
+	}
+	if (ret == 0) {
+		pr_info("connection closed");
+		usbip_event_add(ud, VDEV_EVENT_DOWN);
+		return;
+	}
+	if (ret != sizeof(pdu)) {
+		pr_err("received pdu size is %d, should be %d\n", ret,
+		       (unsigned int)sizeof(pdu));
+		usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
+		return;
+	}
+
+	usbip_header_correct_endian(&pdu, 0);
+
+	if (usbip_dbg_flag_vhci_rx)
+		usbip_dump_header(&pdu);
+
+	switch (pdu.base.command) {
+	case USBIP_RET_SUBMIT:
+		vhci_recv_ret_submit(vdev, &pdu);
+		break;
+	case USBIP_RET_UNLINK:
+		vhci_recv_ret_unlink(vdev, &pdu);
+		break;
+	default:
+		/* NOT REACHED */
+		pr_err("unknown pdu %u\n", pdu.base.command);
+		usbip_dump_header(&pdu);
+		usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
+		break;
+	}
+}
+
+int vhci_rx_loop(void *data)
+{
+	struct usbip_device *ud = data;
+
+	while (!kthread_should_stop()) {
+		if (usbip_event_happened(ud))
+			break;
+
+		vhci_rx_pdu(ud);
+	}
+
+	return 0;
+}
diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c
new file mode 100644
index 000000000000..211f43f67ea2
--- /dev/null
+++ b/drivers/usb/usbip/vhci_sysfs.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright (C) 2003-2008 Takahiro Hirofuchi
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+#include <linux/kthread.h>
+#include <linux/file.h>
+#include <linux/net.h>
+
+#include "usbip_common.h"
+#include "vhci.h"
+
+/* TODO: refine locking ?*/
+
+/* Sysfs entry to show port status */
+static ssize_t status_show(struct device *dev, struct device_attribute *attr,
+			   char *out)
+{
+	char *s = out;
+	int i = 0;
+
+	BUG_ON(!the_controller || !out);
+
+	spin_lock(&the_controller->lock);
+
+	/*
+	 * output example:
+	 * prt sta spd dev socket           local_busid
+	 * 000 004 000 000         c5a7bb80 1-2.3
+	 * 001 004 000 000         d8cee980 2-3.4
+	 *
+	 * IP address can be retrieved from a socket pointer address by looking
+	 * up /proc/net/{tcp,tcp6}. Also, a userland program may remember a
+	 * port number and its peer IP address.
+	 */
+	out += sprintf(out,
+		       "prt sta spd bus dev socket           local_busid\n");
+
+	for (i = 0; i < VHCI_NPORTS; i++) {
+		struct vhci_device *vdev = port_to_vdev(i);
+
+		spin_lock(&vdev->ud.lock);
+		out += sprintf(out, "%03u %03u ", i, vdev->ud.status);
+
+		if (vdev->ud.status == VDEV_ST_USED) {
+			out += sprintf(out, "%03u %08x ",
+				       vdev->speed, vdev->devid);
+			out += sprintf(out, "%16p ", vdev->ud.tcp_socket);
+			out += sprintf(out, "%s", dev_name(&vdev->udev->dev));
+
+		} else {
+			out += sprintf(out, "000 000 000 0000000000000000 0-0");
+		}
+
+		out += sprintf(out, "\n");
+		spin_unlock(&vdev->ud.lock);
+	}
+
+	spin_unlock(&the_controller->lock);
+
+	return out - s;
+}
+static DEVICE_ATTR_RO(status);
+
+/* Sysfs entry to shutdown a virtual connection */
+static int vhci_port_disconnect(__u32 rhport)
+{
+	struct vhci_device *vdev;
+
+	usbip_dbg_vhci_sysfs("enter\n");
+
+	/* lock */
+	spin_lock(&the_controller->lock);
+
+	vdev = port_to_vdev(rhport);
+
+	spin_lock(&vdev->ud.lock);
+	if (vdev->ud.status == VDEV_ST_NULL) {
+		pr_err("not connected %d\n", vdev->ud.status);
+
+		/* unlock */
+		spin_unlock(&vdev->ud.lock);
+		spin_unlock(&the_controller->lock);
+
+		return -EINVAL;
+	}
+
+	/* unlock */
+	spin_unlock(&vdev->ud.lock);
+	spin_unlock(&the_controller->lock);
+
+	usbip_event_add(&vdev->ud, VDEV_EVENT_DOWN);
+
+	return 0;
+}
+
+static ssize_t store_detach(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	int err;
+	__u32 rhport = 0;
+
+	if (sscanf(buf, "%u", &rhport) != 1)
+		return -EINVAL;
+
+	/* check rhport */
+	if (rhport >= VHCI_NPORTS) {
+		dev_err(dev, "invalid port %u\n", rhport);
+		return -EINVAL;
+	}
+
+	err = vhci_port_disconnect(rhport);
+	if (err < 0)
+		return -EINVAL;
+
+	usbip_dbg_vhci_sysfs("Leave\n");
+
+	return count;
+}
+static DEVICE_ATTR(detach, S_IWUSR, NULL, store_detach);
+
+/* Sysfs entry to establish a virtual connection */
+static int valid_args(__u32 rhport, enum usb_device_speed speed)
+{
+	/* check rhport */
+	if (rhport >= VHCI_NPORTS) {
+		pr_err("port %u\n", rhport);
+		return -EINVAL;
+	}
+
+	/* check speed */
+	switch (speed) {
+	case USB_SPEED_LOW:
+	case USB_SPEED_FULL:
+	case USB_SPEED_HIGH:
+	case USB_SPEED_WIRELESS:
+		break;
+	default:
+		pr_err("Failed attach request for unsupported USB speed: %s\n",
+			usb_speed_string(speed));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * To start a new USB/IP attachment, a userland program needs to setup a TCP
+ * connection and then write its socket descriptor with remote device
+ * information into this sysfs file.
+ *
+ * A remote device is virtually attached to the root-hub port of @rhport with
+ * @speed. @devid is embedded into a request to specify the remote device in a
+ * server host.
+ *
+ * write() returns 0 on success, else negative errno.
+ */
+static ssize_t store_attach(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct vhci_device *vdev;
+	struct socket *socket;
+	int sockfd = 0;
+	__u32 rhport = 0, devid = 0, speed = 0;
+	int err;
+
+	/*
+	 * @rhport: port number of vhci_hcd
+	 * @sockfd: socket descriptor of an established TCP connection
+	 * @devid: unique device identifier in a remote host
+	 * @speed: usb device speed in a remote host
+	 */
+	if (sscanf(buf, "%u %u %u %u", &rhport, &sockfd, &devid, &speed) != 4)
+		return -EINVAL;
+
+	usbip_dbg_vhci_sysfs("rhport(%u) sockfd(%u) devid(%u) speed(%u)\n",
+			     rhport, sockfd, devid, speed);
+
+	/* check received parameters */
+	if (valid_args(rhport, speed) < 0)
+		return -EINVAL;
+
+	/* Extract socket from fd. */
+	socket = sockfd_lookup(sockfd, &err);
+	if (!socket)
+		return -EINVAL;
+
+	/* now need lock until setting vdev status as used */
+
+	/* begin a lock */
+	spin_lock(&the_controller->lock);
+	vdev = port_to_vdev(rhport);
+	spin_lock(&vdev->ud.lock);
+
+	if (vdev->ud.status != VDEV_ST_NULL) {
+		/* end of the lock */
+		spin_unlock(&vdev->ud.lock);
+		spin_unlock(&the_controller->lock);
+
+		sockfd_put(socket);
+
+		dev_err(dev, "port %d already used\n", rhport);
+		return -EINVAL;
+	}
+
+	dev_info(dev,
+		 "rhport(%u) sockfd(%d) devid(%u) speed(%u) speed_str(%s)\n",
+		 rhport, sockfd, devid, speed, usb_speed_string(speed));
+
+	vdev->devid         = devid;
+	vdev->speed         = speed;
+	vdev->ud.tcp_socket = socket;
+	vdev->ud.status     = VDEV_ST_NOTASSIGNED;
+
+	spin_unlock(&vdev->ud.lock);
+	spin_unlock(&the_controller->lock);
+	/* end the lock */
+
+	vdev->ud.tcp_rx = kthread_get_run(vhci_rx_loop, &vdev->ud, "vhci_rx");
+	vdev->ud.tcp_tx = kthread_get_run(vhci_tx_loop, &vdev->ud, "vhci_tx");
+
+	rh_port_connect(rhport, speed);
+
+	return count;
+}
+static DEVICE_ATTR(attach, S_IWUSR, NULL, store_attach);
+
+static struct attribute *dev_attrs[] = {
+	&dev_attr_status.attr,
+	&dev_attr_detach.attr,
+	&dev_attr_attach.attr,
+	&dev_attr_usbip_debug.attr,
+	NULL,
+};
+
+const struct attribute_group dev_attr_group = {
+	.attrs = dev_attrs,
+};
diff --git a/drivers/usb/usbip/vhci_tx.c b/drivers/usb/usbip/vhci_tx.c
new file mode 100644
index 000000000000..409fd99f3257
--- /dev/null
+++ b/drivers/usb/usbip/vhci_tx.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2003-2008 Takahiro Hirofuchi
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+#include <linux/kthread.h>
+#include <linux/slab.h>
+
+#include "usbip_common.h"
+#include "vhci.h"
+
+static void setup_cmd_submit_pdu(struct usbip_header *pdup,  struct urb *urb)
+{
+	struct vhci_priv *priv = ((struct vhci_priv *)urb->hcpriv);
+	struct vhci_device *vdev = priv->vdev;
+
+	usbip_dbg_vhci_tx("URB, local devnum %u, remote devid %u\n",
+			  usb_pipedevice(urb->pipe), vdev->devid);
+
+	pdup->base.command   = USBIP_CMD_SUBMIT;
+	pdup->base.seqnum    = priv->seqnum;
+	pdup->base.devid     = vdev->devid;
+	pdup->base.direction = usb_pipein(urb->pipe) ?
+		USBIP_DIR_IN : USBIP_DIR_OUT;
+	pdup->base.ep	     = usb_pipeendpoint(urb->pipe);
+
+	usbip_pack_pdu(pdup, urb, USBIP_CMD_SUBMIT, 1);
+
+	if (urb->setup_packet)
+		memcpy(pdup->u.cmd_submit.setup, urb->setup_packet, 8);
+}
+
+static struct vhci_priv *dequeue_from_priv_tx(struct vhci_device *vdev)
+{
+	struct vhci_priv *priv, *tmp;
+
+	spin_lock(&vdev->priv_lock);
+
+	list_for_each_entry_safe(priv, tmp, &vdev->priv_tx, list) {
+		list_move_tail(&priv->list, &vdev->priv_rx);
+		spin_unlock(&vdev->priv_lock);
+		return priv;
+	}
+
+	spin_unlock(&vdev->priv_lock);
+
+	return NULL;
+}
+
+static int vhci_send_cmd_submit(struct vhci_device *vdev)
+{
+	struct vhci_priv *priv = NULL;
+
+	struct msghdr msg;
+	struct kvec iov[3];
+	size_t txsize;
+
+	size_t total_size = 0;
+
+	while ((priv = dequeue_from_priv_tx(vdev)) != NULL) {
+		int ret;
+		struct urb *urb = priv->urb;
+		struct usbip_header pdu_header;
+		struct usbip_iso_packet_descriptor *iso_buffer = NULL;
+
+		txsize = 0;
+		memset(&pdu_header, 0, sizeof(pdu_header));
+		memset(&msg, 0, sizeof(msg));
+		memset(&iov, 0, sizeof(iov));
+
+		usbip_dbg_vhci_tx("setup txdata urb %p\n", urb);
+
+		/* 1. setup usbip_header */
+		setup_cmd_submit_pdu(&pdu_header, urb);
+		usbip_header_correct_endian(&pdu_header, 1);
+
+		iov[0].iov_base = &pdu_header;
+		iov[0].iov_len  = sizeof(pdu_header);
+		txsize += sizeof(pdu_header);
+
+		/* 2. setup transfer buffer */
+		if (!usb_pipein(urb->pipe) && urb->transfer_buffer_length > 0) {
+			iov[1].iov_base = urb->transfer_buffer;
+			iov[1].iov_len  = urb->transfer_buffer_length;
+			txsize += urb->transfer_buffer_length;
+		}
+
+		/* 3. setup iso_packet_descriptor */
+		if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) {
+			ssize_t len = 0;
+
+			iso_buffer = usbip_alloc_iso_desc_pdu(urb, &len);
+			if (!iso_buffer) {
+				usbip_event_add(&vdev->ud,
+						SDEV_EVENT_ERROR_MALLOC);
+				return -1;
+			}
+
+			iov[2].iov_base = iso_buffer;
+			iov[2].iov_len  = len;
+			txsize += len;
+		}
+
+		ret = kernel_sendmsg(vdev->ud.tcp_socket, &msg, iov, 3, txsize);
+		if (ret != txsize) {
+			pr_err("sendmsg failed!, ret=%d for %zd\n", ret,
+			       txsize);
+			kfree(iso_buffer);
+			usbip_event_add(&vdev->ud, VDEV_EVENT_ERROR_TCP);
+			return -1;
+		}
+
+		kfree(iso_buffer);
+		usbip_dbg_vhci_tx("send txdata\n");
+
+		total_size += txsize;
+	}
+
+	return total_size;
+}
+
+static struct vhci_unlink *dequeue_from_unlink_tx(struct vhci_device *vdev)
+{
+	struct vhci_unlink *unlink, *tmp;
+
+	spin_lock(&vdev->priv_lock);
+
+	list_for_each_entry_safe(unlink, tmp, &vdev->unlink_tx, list) {
+		list_move_tail(&unlink->list, &vdev->unlink_rx);
+		spin_unlock(&vdev->priv_lock);
+		return unlink;
+	}
+
+	spin_unlock(&vdev->priv_lock);
+
+	return NULL;
+}
+
+static int vhci_send_cmd_unlink(struct vhci_device *vdev)
+{
+	struct vhci_unlink *unlink = NULL;
+
+	struct msghdr msg;
+	struct kvec iov[3];
+	size_t txsize;
+
+	size_t total_size = 0;
+
+	while ((unlink = dequeue_from_unlink_tx(vdev)) != NULL) {
+		int ret;
+		struct usbip_header pdu_header;
+
+		txsize = 0;
+		memset(&pdu_header, 0, sizeof(pdu_header));
+		memset(&msg, 0, sizeof(msg));
+		memset(&iov, 0, sizeof(iov));
+
+		usbip_dbg_vhci_tx("setup cmd unlink, %lu\n", unlink->seqnum);
+
+		/* 1. setup usbip_header */
+		pdu_header.base.command = USBIP_CMD_UNLINK;
+		pdu_header.base.seqnum  = unlink->seqnum;
+		pdu_header.base.devid	= vdev->devid;
+		pdu_header.base.ep	= 0;
+		pdu_header.u.cmd_unlink.seqnum = unlink->unlink_seqnum;
+
+		usbip_header_correct_endian(&pdu_header, 1);
+
+		iov[0].iov_base = &pdu_header;
+		iov[0].iov_len  = sizeof(pdu_header);
+		txsize += sizeof(pdu_header);
+
+		ret = kernel_sendmsg(vdev->ud.tcp_socket, &msg, iov, 1, txsize);
+		if (ret != txsize) {
+			pr_err("sendmsg failed!, ret=%d for %zd\n", ret,
+			       txsize);
+			usbip_event_add(&vdev->ud, VDEV_EVENT_ERROR_TCP);
+			return -1;
+		}
+
+		usbip_dbg_vhci_tx("send txdata\n");
+
+		total_size += txsize;
+	}
+
+	return total_size;
+}
+
+int vhci_tx_loop(void *data)
+{
+	struct usbip_device *ud = data;
+	struct vhci_device *vdev = container_of(ud, struct vhci_device, ud);
+
+	while (!kthread_should_stop()) {
+		if (vhci_send_cmd_submit(vdev) < 0)
+			break;
+
+		if (vhci_send_cmd_unlink(vdev) < 0)
+			break;
+
+		wait_event_interruptible(vdev->waitq_tx,
+					 (!list_empty(&vdev->priv_tx) ||
+					  !list_empty(&vdev->unlink_tx) ||
+					  kthread_should_stop()));
+
+		usbip_dbg_vhci_tx("pending urbs ?, now wake up\n");
+	}
+
+	return 0;
+}
diff --git a/include/uapi/linux/usbip.h b/include/uapi/linux/usbip.h
new file mode 100644
index 000000000000..fa5db30ede36
--- /dev/null
+++ b/include/uapi/linux/usbip.h
@@ -0,0 +1,26 @@
+/*
+ *	usbip.h
+ *
+ *	USBIP uapi defines and function prototypes etc.
+*/
+
+#ifndef _UAPI_LINUX_USBIP_H
+#define _UAPI_LINUX_USBIP_H
+
+/* usbip device status - exported in usbip device sysfs status */
+enum usbip_device_status {
+	/* sdev is available. */
+	SDEV_ST_AVAILABLE = 0x01,
+	/* sdev is now used. */
+	SDEV_ST_USED,
+	/* sdev is unusable because of a fatal error. */
+	SDEV_ST_ERROR,
+
+	/* vdev does not connect a remote device. */
+	VDEV_ST_NULL,
+	/* vdev is used, but the USB address is not assigned yet */
+	VDEV_ST_NOTASSIGNED,
+	VDEV_ST_USED,
+	VDEV_ST_ERROR
+};
+#endif /* _UAPI_LINUX_USBIP_H */
-- 
cgit v1.2.3


From 0e227084aee36b3ba27b4fc9cd9e425be6ce2ab8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 12 Aug 2014 20:34:30 +0200
Subject: cfg80211: clarify BSS probe response vs. beacon data

There are a few possible cases of where BSS data came from:
 1) only a beacon has been received
 2) only a probe response has been received
 3) the driver didn't report what it received (this happens when
    using cfg80211_inform_bss[_width]())
 4) both probe response and beacon data has been received

Unfortunately, in the userspace API, a few things weren't there:
 a) there was no way to differentiate cases 1) and 4) above
    without comparing the data of the IEs
 b) the TSF was always from the last frame, instead of being
    exposed for beacon/probe response separately like IEs

Fix this by
   i) exporting a new flag attribute that indicates whether or
      not probe response data has been received - this addresses (a)
  ii) exporting a BEACON_TSF attribute that holds the beacon's TSF
      if a beacon has been received
 iii) not exporting the beacon attributes in case (3) above as that
      would just lead userspace into thinking the data actually came
      from a beacon when that isn't clear

To implement this, track inside the IEs struct whether or not it
(definitely) came from a beacon.

Reported-by: William Seto
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  2 ++
 include/uapi/linux/nl80211.h | 16 ++++++++++++++--
 net/wireless/nl80211.c       | 16 ++++++++++++----
 net/wireless/scan.c          |  6 ++++--
 4 files changed, 32 insertions(+), 8 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7b8dac3efe8f..77b85a89abca 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1503,12 +1503,14 @@ enum cfg80211_signal_type {
  * @tsf: TSF contained in the frame that carried these IEs
  * @rcu_head: internal use, for freeing
  * @len: length of the IEs
+ * @from_beacon: these IEs are known to come from a beacon
  * @data: IE data
  */
 struct cfg80211_bss_ies {
 	u64 tsf;
 	struct rcu_head rcu_head;
 	int len;
+	bool from_beacon;
 	u8 data[];
 };
 
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index f1db15b9c041..d097568da690 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3055,14 +3055,20 @@ enum nl80211_bss_scan_width {
  * @NL80211_BSS_BSSID: BSSID of the BSS (6 octets)
  * @NL80211_BSS_FREQUENCY: frequency in MHz (u32)
  * @NL80211_BSS_TSF: TSF of the received probe response/beacon (u64)
+ *	(if @NL80211_BSS_PRESP_DATA is present then this is known to be
+ *	from a probe response, otherwise it may be from the same beacon
+ *	that the NL80211_BSS_BEACON_TSF will be from)
  * @NL80211_BSS_BEACON_INTERVAL: beacon interval of the (I)BSS (u16)
  * @NL80211_BSS_CAPABILITY: capability field (CPU order, u16)
  * @NL80211_BSS_INFORMATION_ELEMENTS: binary attribute containing the
  *	raw information elements from the probe response/beacon (bin);
- *	if the %NL80211_BSS_BEACON_IES attribute is present, the IEs here are
- *	from a Probe Response frame; otherwise they are from a Beacon frame.
+ *	if the %NL80211_BSS_BEACON_IES attribute is present and the data is
+ *	different then the IEs here are from a Probe Response frame; otherwise
+ *	they are from a Beacon frame.
  *	However, if the driver does not indicate the source of the IEs, these
  *	IEs may be from either frame subtype.
+ *	If present, the @NL80211_BSS_PRESP_DATA attribute indicates that the
+ *	data here is known to be from a probe response, without any heuristics.
  * @NL80211_BSS_SIGNAL_MBM: signal strength of probe response/beacon
  *	in mBm (100 * dBm) (s32)
  * @NL80211_BSS_SIGNAL_UNSPEC: signal strength of the probe response/beacon
@@ -3074,6 +3080,10 @@ enum nl80211_bss_scan_width {
  *	yet been received
  * @NL80211_BSS_CHAN_WIDTH: channel width of the control channel
  *	(u32, enum nl80211_bss_scan_width)
+ * @NL80211_BSS_BEACON_TSF: TSF of the last received beacon (u64)
+ *	(not present if no beacon frame has been received yet)
+ * @NL80211_BSS_PRESP_DATA: the data in @NL80211_BSS_INFORMATION_ELEMENTS and
+ *	@NL80211_BSS_TSF is known to be from a probe response (flag attribute)
  * @__NL80211_BSS_AFTER_LAST: internal
  * @NL80211_BSS_MAX: highest BSS attribute
  */
@@ -3091,6 +3101,8 @@ enum nl80211_bss {
 	NL80211_BSS_SEEN_MS_AGO,
 	NL80211_BSS_BEACON_IES,
 	NL80211_BSS_CHAN_WIDTH,
+	NL80211_BSS_BEACON_TSF,
+	NL80211_BSS_PRESP_DATA,
 
 	/* keep last */
 	__NL80211_BSS_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index df7b1332a1ec..3011401f52c0 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6033,7 +6033,6 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
 	const struct cfg80211_bss_ies *ies;
 	void *hdr;
 	struct nlattr *bss;
-	bool tsf = false;
 
 	ASSERT_WDEV_LOCK(wdev);
 
@@ -6060,18 +6059,27 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
 		goto nla_put_failure;
 
 	rcu_read_lock();
+	/* indicate whether we have probe response data or not */
+	if (rcu_access_pointer(res->proberesp_ies) &&
+	    nla_put_flag(msg, NL80211_BSS_PRESP_DATA))
+		goto fail_unlock_rcu;
+
+	/* this pointer prefers to be pointed to probe response data
+	 * but is always valid
+	 */
 	ies = rcu_dereference(res->ies);
 	if (ies) {
 		if (nla_put_u64(msg, NL80211_BSS_TSF, ies->tsf))
 			goto fail_unlock_rcu;
-		tsf = true;
 		if (ies->len && nla_put(msg, NL80211_BSS_INFORMATION_ELEMENTS,
 					ies->len, ies->data))
 			goto fail_unlock_rcu;
 	}
+
+	/* and this pointer is always (unless driver didn't know) beacon data */
 	ies = rcu_dereference(res->beacon_ies);
-	if (ies) {
-		if (!tsf && nla_put_u64(msg, NL80211_BSS_TSF, ies->tsf))
+	if (ies && ies->from_beacon) {
+		if (nla_put_u64(msg, NL80211_BSS_BEACON_TSF, ies->tsf))
 			goto fail_unlock_rcu;
 		if (ies->len && nla_put(msg, NL80211_BSS_BEACON_IES,
 					ies->len, ies->data))
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 0798c62e6085..ad1a1a2808d3 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -918,11 +918,12 @@ cfg80211_inform_bss_width(struct wiphy *wiphy,
 	 * override the IEs pointer should we have received an earlier
 	 * indication of Probe Response data.
 	 */
-	ies = kmalloc(sizeof(*ies) + ielen, gfp);
+	ies = kzalloc(sizeof(*ies) + ielen, gfp);
 	if (!ies)
 		return NULL;
 	ies->len = ielen;
 	ies->tsf = tsf;
+	ies->from_beacon = false;
 	memcpy(ies->data, ie, ielen);
 
 	rcu_assign_pointer(tmp.pub.beacon_ies, ies);
@@ -982,11 +983,12 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
 	if (!channel)
 		return NULL;
 
-	ies = kmalloc(sizeof(*ies) + ielen, gfp);
+	ies = kzalloc(sizeof(*ies) + ielen, gfp);
 	if (!ies)
 		return NULL;
 	ies->len = ielen;
 	ies->tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp);
+	ies->from_beacon = ieee80211_is_beacon(mgmt->frame_control);
 	memcpy(ies->data, mgmt->u.probe_resp.variable, ielen);
 
 	if (ieee80211_is_probe_resp(mgmt->frame_control))
-- 
cgit v1.2.3


From f111f780ae1abf4cdc464f24293be90c010a04f6 Mon Sep 17 00:00:00 2001
From: Alexey Perevalov <a.perevalov@samsung.com>
Date: Wed, 20 Aug 2014 22:03:18 +0400
Subject: netfilter: nfnetlink_acct: add filter support to nfacct counter
 list/reset

You can use this to skip accounting objects when listing/resetting
via NFNL_MSG_ACCT_GET/NFNL_MSG_ACCT_GET_CTRZERO messages with the
NLM_F_DUMP netlink flag. The filtering covers the following cases:

1. No filter specified. In this case, the client will get old behaviour,
2. List/reset counter object only: In this case, you have to use
   NFACCT_F_QUOTA as mask and value 0.
3. List/reset quota objects only: You have to use NFACCT_F_QUOTA_PKTS
   as mask and value - the same, for byte based quota mask should be
   NFACCT_F_QUOTA_BYTES and value - the same.

If you want to obtain the object with any quota type
(ie. NFACCT_F_QUOTA_PKTS|NFACCT_F_QUOTA_BYTES), you need to perform
two dump requests, one to obtain NFACCT_F_QUOTA_PKTS objects and
another for NFACCT_F_QUOTA_BYTES.

Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nfnetlink_acct.h |  8 ++++
 net/netfilter/nfnetlink_acct.c                | 54 +++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h
index 51404ec19022..f3e34dbbf966 100644
--- a/include/uapi/linux/netfilter/nfnetlink_acct.h
+++ b/include/uapi/linux/netfilter/nfnetlink_acct.h
@@ -28,9 +28,17 @@ enum nfnl_acct_type {
 	NFACCT_USE,
 	NFACCT_FLAGS,
 	NFACCT_QUOTA,
+	NFACCT_FILTER,
 	__NFACCT_MAX
 };
 #define NFACCT_MAX (__NFACCT_MAX - 1)
 
+enum nfnl_attr_filter_type {
+	NFACCT_FILTER_UNSPEC,
+	NFACCT_FILTER_MASK,
+	NFACCT_FILTER_VALUE,
+	__NFACCT_FILTER_MAX
+};
+#define NFACCT_FILTER_MAX (__NFACCT_FILTER_MAX - 1)
 
 #endif /* _UAPI_NFNL_ACCT_H_ */
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 3ea0eacbd970..c18af2f63eef 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -40,6 +40,11 @@ struct nf_acct {
 	char			data[0];
 };
 
+struct nfacct_filter {
+	u32 value;
+	u32 mask;
+};
+
 #define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES)
 #define NFACCT_OVERQUOTA_BIT	2	/* NFACCT_F_OVERQUOTA */
 
@@ -181,6 +186,7 @@ static int
 nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct nf_acct *cur, *last;
+	const struct nfacct_filter *filter = cb->data;
 
 	if (cb->args[2])
 		return 0;
@@ -197,6 +203,10 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 			last = NULL;
 		}
+
+		if (filter && (cur->flags & filter->mask) != filter->value)
+			continue;
+
 		if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid,
 				       cb->nlh->nlmsg_seq,
 				       NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
@@ -211,6 +221,38 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
+static int nfnl_acct_done(struct netlink_callback *cb)
+{
+	kfree(cb->data);
+	return 0;
+}
+
+static const struct nla_policy filter_policy[NFACCT_FILTER_MAX + 1] = {
+	[NFACCT_FILTER_MASK]	= { .type = NLA_U32 },
+	[NFACCT_FILTER_VALUE]	= { .type = NLA_U32 },
+};
+
+static struct nfacct_filter *
+nfacct_filter_alloc(const struct nlattr * const attr)
+{
+	struct nfacct_filter *filter;
+	struct nlattr *tb[NFACCT_FILTER_MAX + 1];
+	int err;
+
+	err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL);
+	if (!filter)
+		return ERR_PTR(-ENOMEM);
+
+	filter->mask = ntohl(nla_get_be32(tb[NFACCT_FILTER_MASK]));
+	filter->value = ntohl(nla_get_be32(tb[NFACCT_FILTER_VALUE]));
+
+	return filter;
+}
+
 static int
 nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
 	     const struct nlmsghdr *nlh, const struct nlattr * const tb[])
@@ -222,7 +264,18 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = nfnl_acct_dump,
+			.done = nfnl_acct_done,
 		};
+
+		if (tb[NFACCT_FILTER]) {
+			struct nfacct_filter *filter;
+
+			filter = nfacct_filter_alloc(tb[NFACCT_FILTER]);
+			if (IS_ERR(filter))
+				return PTR_ERR(filter);
+
+			c.data = filter;
+		}
 		return netlink_dump_start(nfnl, skb, nlh, &c);
 	}
 
@@ -314,6 +367,7 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
 	[NFACCT_PKTS] = { .type = NLA_U64 },
 	[NFACCT_FLAGS] = { .type = NLA_U32 },
 	[NFACCT_QUOTA] = { .type = NLA_U64 },
+	[NFACCT_FILTER] = {.type = NLA_NESTED },
 };
 
 static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
-- 
cgit v1.2.3


From 3e8a72d1dae374cf6fc1dba97cec663585845ff9 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 27 Aug 2014 17:04:46 -0700
Subject: net: dsa: reduce number of protocol hooks

DSA is currently registering one packet_type function per EtherType it
needs to intercept in the receive path of a DSA-enabled Ethernet device.
Right now we have three of them: trailer, DSA and eDSA, and there might
be more in the future, this will not scale to the addition of new
protocols.

This patch proceeds with adding a new layer of abstraction and two new
functions:

dsa_switch_rcv() which will dispatch into the tag-protocol specific
receive function implemented by net/dsa/tag_*.c

dsa_slave_xmit() which will dispatch into the tag-protocol specific
transmit function implemented by net/dsa/tag_*.c

When we do create the per-port slave network devices, we iterate over
the switch protocol to assign the DSA-specific receive and transmit
operations.

A new fake ethertype value is used: ETH_P_XDSA to illustrate the fact
that this is no longer going to look like ETH_P_DSA or ETH_P_TRAILER
like it used to be.

This allows us to greatly simplify the check in eth_type_trans() and
always override the skb->protocol with ETH_P_XDSA for Ethernet switches
tagged protocol, while also reducing the number repetitive slave
netdevice_ops assignments.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h     | 28 ++++++++++++---------------
 include/net/dsa.h             | 20 +++----------------
 include/uapi/linux/if_ether.h |  1 +
 net/dsa/dsa.c                 | 39 ++++++++++++++++++++-----------------
 net/dsa/dsa_priv.h            |  9 +++------
 net/dsa/slave.c               | 45 ++++++++++++++-----------------------------
 net/dsa/tag_dsa.c             |  8 ++++----
 net/dsa/tag_edsa.c            |  8 ++++----
 net/dsa/tag_trailer.c         |  8 ++++----
 net/ethernet/eth.c            |  7 ++-----
 10 files changed, 68 insertions(+), 105 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 039b23786c22..1875dc71422a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1781,24 +1781,13 @@ void dev_net_set(struct net_device *dev, struct net *net)
 #endif
 }
 
-static inline bool netdev_uses_dsa_tags(struct net_device *dev)
+static inline bool netdev_uses_dsa(struct net_device *dev)
 {
-#ifdef CONFIG_NET_DSA_TAG_DSA
-	if (dev->dsa_ptr != NULL)
-		return dsa_uses_dsa_tags(dev->dsa_ptr);
-#endif
-
-	return 0;
-}
-
-static inline bool netdev_uses_trailer_tags(struct net_device *dev)
-{
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
-	if (dev->dsa_ptr != NULL)
-		return dsa_uses_trailer_tags(dev->dsa_ptr);
+#ifdef CONFIG_NET_DSA
+	return dev->dsa_ptr != NULL;
+#else
+	return false;
 #endif
-
-	return 0;
 }
 
 /**
@@ -1933,6 +1922,13 @@ struct udp_offload {
 	struct offload_callbacks callbacks;
 };
 
+struct dsa_device_ops {
+	netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev);
+	int (*rcv)(struct sk_buff *skb, struct net_device *dev,
+		   struct packet_type *pt, struct net_device *orig_dev);
+};
+
+
 /* often modified stats are per cpu, other are shared (netdev->stats) */
 struct pcpu_sw_netstats {
 	u64     rx_packets;
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 6efce384451e..6e26f1e4d8ce 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -59,6 +59,8 @@ struct dsa_platform_data {
 	struct dsa_chip_data	*chip;
 };
 
+struct dsa_device_ops;
+
 struct dsa_switch_tree {
 	/*
 	 * Configuration data for the platform device that owns
@@ -71,6 +73,7 @@ struct dsa_switch_tree {
 	 * protocol to use.
 	 */
 	struct net_device	*master_netdev;
+	const struct dsa_device_ops	*ops;
 	__be16			tag_protocol;
 
 	/*
@@ -186,21 +189,4 @@ static inline void *ds_to_priv(struct dsa_switch *ds)
 	return (void *)(ds + 1);
 }
 
-/*
- * The original DSA tag format and some other tag formats have no
- * ethertype, which means that we need to add a little hack to the
- * networking receive path to make sure that received frames get
- * the right ->protocol assigned to them when one of those tag
- * formats is in use.
- */
-static inline bool dsa_uses_dsa_tags(struct dsa_switch_tree *dst)
-{
-	return !!(dst->tag_protocol == htons(ETH_P_DSA));
-}
-
-static inline bool dsa_uses_trailer_tags(struct dsa_switch_tree *dst)
-{
-	return !!(dst->tag_protocol == htons(ETH_P_TRAILER));
-}
-
 #endif
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 0f8210b8e0bc..aa63ed023c2b 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -128,6 +128,7 @@
 #define ETH_P_PHONET	0x00F5		/* Nokia Phonet frames          */
 #define ETH_P_IEEE802154 0x00F6		/* IEEE802.15.4 frame		*/
 #define ETH_P_CAIF	0x00F7		/* ST-Ericsson CAIF protocol	*/
+#define ETH_P_XDSA	0x00F8		/* Multiplexed DSA protocol	*/
 
 /*
  *	This is an Ethernet frame header.
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 0a49632fac47..92e71d2a2ccd 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -608,6 +608,24 @@ static void dsa_shutdown(struct platform_device *pdev)
 {
 }
 
+static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
+			  struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct dsa_switch_tree *dst = dev->dsa_ptr;
+
+	if (unlikely(dst == NULL)) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	return dst->ops->rcv(skb, dev, pt, orig_dev);
+}
+
+struct packet_type dsa_pack_type __read_mostly = {
+	.type	= cpu_to_be16(ETH_P_XDSA),
+	.func	= dsa_switch_rcv,
+};
+
 static const struct of_device_id dsa_of_match_table[] = {
 	{ .compatible = "marvell,dsa", },
 	{}
@@ -633,30 +651,15 @@ static int __init dsa_init_module(void)
 	if (rc)
 		return rc;
 
-#ifdef CONFIG_NET_DSA_TAG_DSA
-	dev_add_pack(&dsa_packet_type);
-#endif
-#ifdef CONFIG_NET_DSA_TAG_EDSA
-	dev_add_pack(&edsa_packet_type);
-#endif
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
-	dev_add_pack(&trailer_packet_type);
-#endif
+	dev_add_pack(&dsa_pack_type);
+
 	return 0;
 }
 module_init(dsa_init_module);
 
 static void __exit dsa_cleanup_module(void)
 {
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
-	dev_remove_pack(&trailer_packet_type);
-#endif
-#ifdef CONFIG_NET_DSA_TAG_EDSA
-	dev_remove_pack(&edsa_packet_type);
-#endif
-#ifdef CONFIG_NET_DSA_TAG_DSA
-	dev_remove_pack(&dsa_packet_type);
-#endif
+	dev_remove_pack(&dsa_pack_type);
 	platform_driver_unregister(&dsa_driver);
 }
 module_exit(dsa_cleanup_module);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index d4cf5cc747e3..218d75d16f6f 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -45,16 +45,13 @@ struct net_device *dsa_slave_create(struct dsa_switch *ds,
 				    int port, char *name);
 
 /* tag_dsa.c */
-netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev);
-extern struct packet_type dsa_packet_type;
+extern const struct dsa_device_ops dsa_netdev_ops;
 
 /* tag_edsa.c */
-netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev);
-extern struct packet_type edsa_packet_type;
+extern const struct dsa_device_ops edsa_netdev_ops;
 
 /* tag_trailer.c */
-netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev);
-extern struct packet_type trailer_packet_type;
+extern const struct dsa_device_ops trailer_netdev_ops;
 
 
 #endif
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 45a1e34c89e0..ad1a913533aa 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -171,6 +171,14 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return -EOPNOTSUPP;
 }
 
+static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch_tree *dst = p->parent->dst;
+
+	return dst->ops->xmit(skb, dev);
+}
+
 
 /* ethtool operations *******************************************************/
 static int
@@ -293,42 +301,16 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
 	.get_sset_count		= dsa_slave_get_sset_count,
 };
 
-#ifdef CONFIG_NET_DSA_TAG_DSA
-static const struct net_device_ops dsa_netdev_ops = {
+static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_init		= dsa_slave_init,
 	.ndo_open	 	= dsa_slave_open,
 	.ndo_stop		= dsa_slave_close,
-	.ndo_start_xmit		= dsa_xmit,
+	.ndo_start_xmit		= dsa_slave_xmit,
 	.ndo_change_rx_flags	= dsa_slave_change_rx_flags,
 	.ndo_set_rx_mode	= dsa_slave_set_rx_mode,
 	.ndo_set_mac_address	= dsa_slave_set_mac_address,
 	.ndo_do_ioctl		= dsa_slave_ioctl,
 };
-#endif
-#ifdef CONFIG_NET_DSA_TAG_EDSA
-static const struct net_device_ops edsa_netdev_ops = {
-	.ndo_init		= dsa_slave_init,
-	.ndo_open	 	= dsa_slave_open,
-	.ndo_stop		= dsa_slave_close,
-	.ndo_start_xmit		= edsa_xmit,
-	.ndo_change_rx_flags	= dsa_slave_change_rx_flags,
-	.ndo_set_rx_mode	= dsa_slave_set_rx_mode,
-	.ndo_set_mac_address	= dsa_slave_set_mac_address,
-	.ndo_do_ioctl		= dsa_slave_ioctl,
-};
-#endif
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
-static const struct net_device_ops trailer_netdev_ops = {
-	.ndo_init		= dsa_slave_init,
-	.ndo_open	 	= dsa_slave_open,
-	.ndo_stop		= dsa_slave_close,
-	.ndo_start_xmit		= trailer_xmit,
-	.ndo_change_rx_flags	= dsa_slave_change_rx_flags,
-	.ndo_set_rx_mode	= dsa_slave_set_rx_mode,
-	.ndo_set_mac_address	= dsa_slave_set_mac_address,
-	.ndo_do_ioctl		= dsa_slave_ioctl,
-};
-#endif
 
 /* slave device setup *******************************************************/
 struct net_device *
@@ -349,21 +331,22 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 	slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
 	eth_hw_addr_inherit(slave_dev, master);
 	slave_dev->tx_queue_len = 0;
+	slave_dev->netdev_ops = &dsa_slave_netdev_ops;
 
 	switch (ds->dst->tag_protocol) {
 #ifdef CONFIG_NET_DSA_TAG_DSA
 	case htons(ETH_P_DSA):
-		slave_dev->netdev_ops = &dsa_netdev_ops;
+		ds->dst->ops = &dsa_netdev_ops;
 		break;
 #endif
 #ifdef CONFIG_NET_DSA_TAG_EDSA
 	case htons(ETH_P_EDSA):
-		slave_dev->netdev_ops = &edsa_netdev_ops;
+		ds->dst->ops = &edsa_netdev_ops;
 		break;
 #endif
 #ifdef CONFIG_NET_DSA_TAG_TRAILER
 	case htons(ETH_P_TRAILER):
-		slave_dev->netdev_ops = &trailer_netdev_ops;
+		ds->dst->ops = &trailer_netdev_ops;
 		break;
 #endif
 	default:
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index cacce1e22f9c..d7dbc5bda5c0 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -16,7 +16,7 @@
 
 #define DSA_HLEN	4
 
-netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	u8 *dsa_header;
@@ -186,7 +186,7 @@ out:
 	return 0;
 }
 
-struct packet_type dsa_packet_type __read_mostly = {
-	.type	= cpu_to_be16(ETH_P_DSA),
-	.func	= dsa_rcv,
+const struct dsa_device_ops dsa_netdev_ops = {
+	.xmit	= dsa_xmit,
+	.rcv	= dsa_rcv,
 };
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index e70c43c25e64..6b30abe89183 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -17,7 +17,7 @@
 #define DSA_HLEN	4
 #define EDSA_HLEN	8
 
-netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	u8 *edsa_header;
@@ -205,7 +205,7 @@ out:
 	return 0;
 }
 
-struct packet_type edsa_packet_type __read_mostly = {
-	.type	= cpu_to_be16(ETH_P_EDSA),
-	.func	= edsa_rcv,
+const struct dsa_device_ops edsa_netdev_ops = {
+	.xmit	= edsa_xmit,
+	.rcv	= edsa_rcv,
 };
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 94bc260d015d..5fe9444842c5 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -14,7 +14,7 @@
 #include <linux/slab.h>
 #include "dsa_priv.h"
 
-netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct sk_buff *nskb;
@@ -114,7 +114,7 @@ out:
 	return 0;
 }
 
-struct packet_type trailer_packet_type __read_mostly = {
-	.type	= cpu_to_be16(ETH_P_TRAILER),
-	.func	= trailer_rcv,
+const struct dsa_device_ops trailer_netdev_ops = {
+	.xmit	= trailer_xmit,
+	.rcv	= trailer_rcv,
 };
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index f405e0592407..5cebca134585 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -181,11 +181,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	 * variants has been configured on the receiving interface,
 	 * and if so, set skb->protocol without looking at the packet.
 	 */
-	if (unlikely(netdev_uses_dsa_tags(dev)))
-		return htons(ETH_P_DSA);
-
-	if (unlikely(netdev_uses_trailer_tags(dev)))
-		return htons(ETH_P_TRAILER);
+	if (unlikely(netdev_uses_dsa(dev)))
+		return htons(ETH_P_XDSA);
 
 	if (likely(ntohs(eth->h_proto) >= ETH_P_802_3_MIN))
 		return eth->h_proto;
-- 
cgit v1.2.3


From 0f8a4de3e088797576ac76200b634b802e5c7781 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 26 Aug 2014 14:00:37 +0200
Subject: KVM: Unconditionally export KVM_CAP_READONLY_MEM

The idea between capabilities and the KVM_CHECK_EXTENSION ioctl is that
userspace can, at run-time, determine if a feature is supported or not.
This allows KVM to being supporting a new feature with a new kernel
version without any need to update user space.  Unfortunately, since the
definition of KVM_CAP_READONLY_MEM was guarded by #ifdef
__KVM_HAVE_READONLY_MEM, such discovery still required a user space
update.

Therefore, unconditionally export KVM_CAP_READONLY_MEM and change the
in-kernel conditional to rely on __KVM_HAVE_READONLY_MEM.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 2 --
 virt/kvm/kvm_main.c      | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index cf3a2ff440e4..90d3edab839c 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -738,9 +738,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_GET_SMMU_INFO 78
 #define KVM_CAP_S390_COW 79
 #define KVM_CAP_PPC_ALLOC_HTAB 80
-#ifdef __KVM_HAVE_READONLY_MEM
 #define KVM_CAP_READONLY_MEM 81
-#endif
 #define KVM_CAP_IRQFD_RESAMPLE 82
 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83
 #define KVM_CAP_PPC_HTAB_FD 84
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5a0817ee996e..1d03967def40 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -708,7 +708,7 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
 {
 	u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
 
-#ifdef KVM_CAP_READONLY_MEM
+#ifdef __KVM_HAVE_READONLY_MEM
 	valid_flags |= KVM_MEM_READONLY;
 #endif
 
-- 
cgit v1.2.3


From 44b5ce73c99c389817be71b9161bceb197d40ecb Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 26 Aug 2014 14:00:38 +0200
Subject: KVM: Unconditionally export KVM_CAP_USER_NMI

The idea between capabilities and the KVM_CHECK_EXTENSION ioctl is that
userspace can, at run-time, determine if a feature is supported or not.
This allows KVM to being supporting a new feature with a new kernel
version without any need to update user space.  Unfortunately, since the
definition of KVM_CAP_USER_NMI was guarded by #ifdef
__KVM_HAVE_USER_NMI, such discovery still required a user space update.

Therefore, unconditionally export KVM_CAP_USER_NMI and change the
the typo in the comment for the IOCTL number definition as well.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 90d3edab839c..0695a1e3e332 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -654,9 +654,7 @@ struct kvm_ppc_smmu_info {
 #endif
 /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
-#ifdef __KVM_HAVE_USER_NMI
 #define KVM_CAP_USER_NMI 22
-#endif
 #ifdef __KVM_HAVE_GUEST_DEBUG
 #define KVM_CAP_SET_GUEST_DEBUG 23
 #endif
@@ -1091,7 +1089,7 @@ struct kvm_s390_ucas_mapping {
 #define KVM_S390_INITIAL_RESET    _IO(KVMIO,   0x97)
 #define KVM_GET_MP_STATE          _IOR(KVMIO,  0x98, struct kvm_mp_state)
 #define KVM_SET_MP_STATE          _IOW(KVMIO,  0x99, struct kvm_mp_state)
-/* Available with KVM_CAP_NMI */
+/* Available with KVM_CAP_USER_NMI */
 #define KVM_NMI                   _IO(KVMIO,   0x9a)
 /* Available with KVM_CAP_SET_GUEST_DEBUG */
 #define KVM_SET_GUEST_DEBUG       _IOW(KVMIO,  0x9b, struct kvm_guest_debug)
-- 
cgit v1.2.3


From bfcfd44cce2774f19daeb59fb4e43fc9aa80e7b8 Mon Sep 17 00:00:00 2001
From: Filipe Brandenburger <filbranden@google.com>
Date: Fri, 29 Aug 2014 15:18:51 -0700
Subject: xattr: fix check for simultaneous glibc header inclusion

The guard was introduced in commit ea1a8217b06b ("xattr: guard against
simultaneous glibc header inclusion") but it is using #ifdef to check
for a define that is either set to 1 or 0.  Fix it to use #if instead.

* Without this patch:

  $ { echo "#include <sys/xattr.h>"; echo "#include <linux/xattr.h>"; } | gcc -E -Iinclude/uapi - >/dev/null
  include/uapi/linux/xattr.h:19:0: warning: "XATTR_CREATE" redefined [enabled by default]
   #define XATTR_CREATE 0x1 /* set value, fail if attr already exists */
   ^
  /usr/include/x86_64-linux-gnu/sys/xattr.h:32:0: note: this is the location of the previous definition
   #define XATTR_CREATE XATTR_CREATE
   ^

* With this patch:

  $ { echo "#include <sys/xattr.h>"; echo "#include <linux/xattr.h>"; } | gcc -E -Iinclude/uapi - >/dev/null
  (no warnings)

Signed-off-by: Filipe Brandenburger <filbranden@google.com>
Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Cc: Allan McRae <allan@archlinux.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/xattr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h
index c38355c1f3c9..1590c49cae57 100644
--- a/include/uapi/linux/xattr.h
+++ b/include/uapi/linux/xattr.h
@@ -13,7 +13,7 @@
 #ifndef _UAPI_LINUX_XATTR_H
 #define _UAPI_LINUX_XATTR_H
 
-#ifdef __UAPI_DEF_XATTR
+#if __UAPI_DEF_XATTR
 #define __USE_KERNEL_XATTR_DEFS
 
 #define XATTR_CREATE	0x1	/* set value, fail if attr already exists */
-- 
cgit v1.2.3


From 880a6fab8f6ba5b5abe59ea68533202ddea1012c Mon Sep 17 00:00:00 2001
From: Christophe Gouault <christophe.gouault@6wind.com>
Date: Fri, 29 Aug 2014 16:16:05 +0200
Subject: xfrm: configure policy hash table thresholds by netlink

Enable to specify local and remote prefix length thresholds for the
policy hash table via a netlink XFRM_MSG_NEWSPDINFO message.

prefix length thresholds are specified by XFRMA_SPD_IPV4_HTHRESH and
XFRMA_SPD_IPV6_HTHRESH optional attributes (struct xfrmu_spdhthresh).

example:

    struct xfrmu_spdhthresh thresh4 = {
        .lbits = 0;
        .rbits = 24;
    };
    struct xfrmu_spdhthresh thresh6 = {
        .lbits = 0;
        .rbits = 56;
    };
    struct nlmsghdr *hdr;
    struct nl_msg *msg;

    msg = nlmsg_alloc();
    hdr = nlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, XFRMA_SPD_IPV4_HTHRESH, sizeof(__u32), NLM_F_REQUEST);
    nla_put(msg, XFRMA_SPD_IPV4_HTHRESH, sizeof(thresh4), &thresh4);
    nla_put(msg, XFRMA_SPD_IPV6_HTHRESH, sizeof(thresh6), &thresh6);
    nla_send_auto(sk, msg);

The numbers are the policy selector minimum prefix lengths to put a
policy in the hash table.

- lbits is the local threshold (source address for out policies,
  destination address for in and fwd policies).

- rbits is the remote threshold (destination address for out
  policies, source address for in and fwd policies).

The default values are:

XFRMA_SPD_IPV4_HTHRESH: 32 32
XFRMA_SPD_IPV6_HTHRESH: 128 128

Dynamic re-building of the SPD is performed when the thresholds values
are changed.

The current thresholds can be read via a XFRM_MSG_GETSPDINFO request:
the kernel replies to XFRM_MSG_GETSPDINFO requests by an
XFRM_MSG_NEWSPDINFO message, with both attributes
XFRMA_SPD_IPV4_HTHRESH and XFRMA_SPD_IPV6_HTHRESH.

Signed-off-by: Christophe Gouault <christophe.gouault@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/netns/xfrm.h  | 10 ++++++
 include/net/xfrm.h        |  1 +
 include/uapi/linux/xfrm.h |  7 ++++
 net/xfrm/xfrm_policy.c    | 87 +++++++++++++++++++++++++++++++++++++++++++++++
 net/xfrm/xfrm_user.c      | 80 +++++++++++++++++++++++++++++++++++++++++--
 5 files changed, 182 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 41902a8103bd..9da798256f0e 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -19,6 +19,15 @@ struct xfrm_policy_hash {
 	u8			sbits6;
 };
 
+struct xfrm_policy_hthresh {
+	struct work_struct	work;
+	seqlock_t		lock;
+	u8			lbits4;
+	u8			rbits4;
+	u8			lbits6;
+	u8			rbits6;
+};
+
 struct netns_xfrm {
 	struct list_head	state_all;
 	/*
@@ -45,6 +54,7 @@ struct netns_xfrm {
 	struct xfrm_policy_hash	policy_bydst[XFRM_POLICY_MAX * 2];
 	unsigned int		policy_count[XFRM_POLICY_MAX * 2];
 	struct work_struct	policy_hash_work;
+	struct xfrm_policy_hthresh policy_hthresh;
 
 
 	struct sock		*nlsk;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 721e9c3b11bd..dc4865e90fe4 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1591,6 +1591,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark,
 struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8, int dir,
 				     u32 id, int delete, int *err);
 int xfrm_policy_flush(struct net *net, u8 type, bool task_valid);
+void xfrm_policy_hash_rebuild(struct net *net);
 u32 xfrm_get_acqseq(void);
 int verify_spi_info(u8 proto, u32 min, u32 max);
 int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 25e5dd916ba4..02d5125a5ee8 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -328,6 +328,8 @@ enum xfrm_spdattr_type_t {
 	XFRMA_SPD_UNSPEC,
 	XFRMA_SPD_INFO,
 	XFRMA_SPD_HINFO,
+	XFRMA_SPD_IPV4_HTHRESH,
+	XFRMA_SPD_IPV6_HTHRESH,
 	__XFRMA_SPD_MAX
 
 #define XFRMA_SPD_MAX (__XFRMA_SPD_MAX - 1)
@@ -347,6 +349,11 @@ struct xfrmu_spdhinfo {
 	__u32 spdhmcnt;
 };
 
+struct xfrmu_spdhthresh {
+	__u8 lbits;
+	__u8 rbits;
+};
+
 struct xfrm_usersa_info {
 	struct xfrm_selector		sel;
 	struct xfrm_id			id;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index e6ff7b4046ea..55bcb8604bc6 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -566,6 +566,86 @@ static void xfrm_hash_resize(struct work_struct *work)
 	mutex_unlock(&hash_resize_mutex);
 }
 
+static void xfrm_hash_rebuild(struct work_struct *work)
+{
+	struct net *net = container_of(work, struct net,
+				       xfrm.policy_hthresh.work);
+	unsigned int hmask;
+	struct xfrm_policy *pol;
+	struct xfrm_policy *policy;
+	struct hlist_head *chain;
+	struct hlist_head *odst;
+	struct hlist_node *newpos;
+	int i;
+	int dir;
+	unsigned seq;
+	u8 lbits4, rbits4, lbits6, rbits6;
+
+	mutex_lock(&hash_resize_mutex);
+
+	/* read selector prefixlen thresholds */
+	do {
+		seq = read_seqbegin(&net->xfrm.policy_hthresh.lock);
+
+		lbits4 = net->xfrm.policy_hthresh.lbits4;
+		rbits4 = net->xfrm.policy_hthresh.rbits4;
+		lbits6 = net->xfrm.policy_hthresh.lbits6;
+		rbits6 = net->xfrm.policy_hthresh.rbits6;
+	} while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
+
+	write_lock_bh(&net->xfrm.xfrm_policy_lock);
+
+	/* reset the bydst and inexact table in all directions */
+	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+		INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
+		hmask = net->xfrm.policy_bydst[dir].hmask;
+		odst = net->xfrm.policy_bydst[dir].table;
+		for (i = hmask; i >= 0; i--)
+			INIT_HLIST_HEAD(odst + i);
+		if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
+			/* dir out => dst = remote, src = local */
+			net->xfrm.policy_bydst[dir].dbits4 = rbits4;
+			net->xfrm.policy_bydst[dir].sbits4 = lbits4;
+			net->xfrm.policy_bydst[dir].dbits6 = rbits6;
+			net->xfrm.policy_bydst[dir].sbits6 = lbits6;
+		} else {
+			/* dir in/fwd => dst = local, src = remote */
+			net->xfrm.policy_bydst[dir].dbits4 = lbits4;
+			net->xfrm.policy_bydst[dir].sbits4 = rbits4;
+			net->xfrm.policy_bydst[dir].dbits6 = lbits6;
+			net->xfrm.policy_bydst[dir].sbits6 = rbits6;
+		}
+	}
+
+	/* re-insert all policies by order of creation */
+	list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
+		newpos = NULL;
+		chain = policy_hash_bysel(net, &policy->selector,
+					  policy->family,
+					  xfrm_policy_id2dir(policy->index));
+		hlist_for_each_entry(pol, chain, bydst) {
+			if (policy->priority >= pol->priority)
+				newpos = &pol->bydst;
+			else
+				break;
+		}
+		if (newpos)
+			hlist_add_behind(&policy->bydst, newpos);
+		else
+			hlist_add_head(&policy->bydst, chain);
+	}
+
+	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+
+	mutex_unlock(&hash_resize_mutex);
+}
+
+void xfrm_policy_hash_rebuild(struct net *net)
+{
+	schedule_work(&net->xfrm.policy_hthresh.work);
+}
+EXPORT_SYMBOL(xfrm_policy_hash_rebuild);
+
 /* Generate new index... KAME seems to generate them ordered by cost
  * of an absolute inpredictability of ordering of rules. This will not pass. */
 static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
@@ -2872,9 +2952,16 @@ static int __net_init xfrm_policy_init(struct net *net)
 		htab->dbits6 = 128;
 		htab->sbits6 = 128;
 	}
+	net->xfrm.policy_hthresh.lbits4 = 32;
+	net->xfrm.policy_hthresh.rbits4 = 32;
+	net->xfrm.policy_hthresh.lbits6 = 128;
+	net->xfrm.policy_hthresh.rbits6 = 128;
+
+	seqlock_init(&net->xfrm.policy_hthresh.lock);
 
 	INIT_LIST_HEAD(&net->xfrm.policy_all);
 	INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
+	INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
 	if (net_eq(net, &init_net))
 		register_netdevice_notifier(&xfrm_dev_notifier);
 	return 0;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d4db6ebb089d..eaf8a8f1cbe8 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -964,7 +964,9 @@ static inline size_t xfrm_spdinfo_msgsize(void)
 {
 	return NLMSG_ALIGN(4)
 	       + nla_total_size(sizeof(struct xfrmu_spdinfo))
-	       + nla_total_size(sizeof(struct xfrmu_spdhinfo));
+	       + nla_total_size(sizeof(struct xfrmu_spdhinfo))
+	       + nla_total_size(sizeof(struct xfrmu_spdhthresh))
+	       + nla_total_size(sizeof(struct xfrmu_spdhthresh));
 }
 
 static int build_spdinfo(struct sk_buff *skb, struct net *net,
@@ -973,9 +975,11 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net,
 	struct xfrmk_spdinfo si;
 	struct xfrmu_spdinfo spc;
 	struct xfrmu_spdhinfo sph;
+	struct xfrmu_spdhthresh spt4, spt6;
 	struct nlmsghdr *nlh;
 	int err;
 	u32 *f;
+	unsigned lseq;
 
 	nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0);
 	if (nlh == NULL) /* shouldn't really happen ... */
@@ -993,9 +997,22 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net,
 	sph.spdhcnt = si.spdhcnt;
 	sph.spdhmcnt = si.spdhmcnt;
 
+	do {
+		lseq = read_seqbegin(&net->xfrm.policy_hthresh.lock);
+
+		spt4.lbits = net->xfrm.policy_hthresh.lbits4;
+		spt4.rbits = net->xfrm.policy_hthresh.rbits4;
+		spt6.lbits = net->xfrm.policy_hthresh.lbits6;
+		spt6.rbits = net->xfrm.policy_hthresh.rbits6;
+	} while (read_seqretry(&net->xfrm.policy_hthresh.lock, lseq));
+
 	err = nla_put(skb, XFRMA_SPD_INFO, sizeof(spc), &spc);
 	if (!err)
 		err = nla_put(skb, XFRMA_SPD_HINFO, sizeof(sph), &sph);
+	if (!err)
+		err = nla_put(skb, XFRMA_SPD_IPV4_HTHRESH, sizeof(spt4), &spt4);
+	if (!err)
+		err = nla_put(skb, XFRMA_SPD_IPV6_HTHRESH, sizeof(spt6), &spt6);
 	if (err) {
 		nlmsg_cancel(skb, nlh);
 		return err;
@@ -1004,6 +1021,51 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net,
 	return nlmsg_end(skb, nlh);
 }
 
+static int xfrm_set_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
+			    struct nlattr **attrs)
+{
+	struct net *net = sock_net(skb->sk);
+	struct xfrmu_spdhthresh *thresh4 = NULL;
+	struct xfrmu_spdhthresh *thresh6 = NULL;
+
+	/* selector prefixlen thresholds to hash policies */
+	if (attrs[XFRMA_SPD_IPV4_HTHRESH]) {
+		struct nlattr *rta = attrs[XFRMA_SPD_IPV4_HTHRESH];
+
+		if (nla_len(rta) < sizeof(*thresh4))
+			return -EINVAL;
+		thresh4 = nla_data(rta);
+		if (thresh4->lbits > 32 || thresh4->rbits > 32)
+			return -EINVAL;
+	}
+	if (attrs[XFRMA_SPD_IPV6_HTHRESH]) {
+		struct nlattr *rta = attrs[XFRMA_SPD_IPV6_HTHRESH];
+
+		if (nla_len(rta) < sizeof(*thresh6))
+			return -EINVAL;
+		thresh6 = nla_data(rta);
+		if (thresh6->lbits > 128 || thresh6->rbits > 128)
+			return -EINVAL;
+	}
+
+	if (thresh4 || thresh6) {
+		write_seqlock(&net->xfrm.policy_hthresh.lock);
+		if (thresh4) {
+			net->xfrm.policy_hthresh.lbits4 = thresh4->lbits;
+			net->xfrm.policy_hthresh.rbits4 = thresh4->rbits;
+		}
+		if (thresh6) {
+			net->xfrm.policy_hthresh.lbits6 = thresh6->lbits;
+			net->xfrm.policy_hthresh.rbits6 = thresh6->rbits;
+		}
+		write_sequnlock(&net->xfrm.policy_hthresh.lock);
+
+		xfrm_policy_hash_rebuild(net);
+	}
+
+	return 0;
+}
+
 static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 		struct nlattr **attrs)
 {
@@ -2274,6 +2336,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
 	[XFRM_MSG_REPORT      - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report),
 	[XFRM_MSG_MIGRATE     - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id),
 	[XFRM_MSG_GETSADINFO  - XFRM_MSG_BASE] = sizeof(u32),
+	[XFRM_MSG_NEWSPDINFO  - XFRM_MSG_BASE] = sizeof(u32),
 	[XFRM_MSG_GETSPDINFO  - XFRM_MSG_BASE] = sizeof(u32),
 };
 
@@ -2308,10 +2371,17 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_ADDRESS_FILTER]	= { .len = sizeof(struct xfrm_address_filter) },
 };
 
+static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
+	[XFRMA_SPD_IPV4_HTHRESH] = { .len = sizeof(struct xfrmu_spdhthresh) },
+	[XFRMA_SPD_IPV6_HTHRESH] = { .len = sizeof(struct xfrmu_spdhthresh) },
+};
+
 static const struct xfrm_link {
 	int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);
 	int (*dump)(struct sk_buff *, struct netlink_callback *);
 	int (*done)(struct netlink_callback *);
+	const struct nla_policy *nla_pol;
+	int nla_max;
 } xfrm_dispatch[XFRM_NR_MSGTYPES] = {
 	[XFRM_MSG_NEWSA       - XFRM_MSG_BASE] = { .doit = xfrm_add_sa        },
 	[XFRM_MSG_DELSA       - XFRM_MSG_BASE] = { .doit = xfrm_del_sa        },
@@ -2335,6 +2405,9 @@ static const struct xfrm_link {
 	[XFRM_MSG_GETAE       - XFRM_MSG_BASE] = { .doit = xfrm_get_ae  },
 	[XFRM_MSG_MIGRATE     - XFRM_MSG_BASE] = { .doit = xfrm_do_migrate    },
 	[XFRM_MSG_GETSADINFO  - XFRM_MSG_BASE] = { .doit = xfrm_get_sadinfo   },
+	[XFRM_MSG_NEWSPDINFO  - XFRM_MSG_BASE] = { .doit = xfrm_set_spdinfo,
+						   .nla_pol = xfrma_spd_policy,
+						   .nla_max = XFRMA_SPD_MAX },
 	[XFRM_MSG_GETSPDINFO  - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo   },
 };
 
@@ -2371,8 +2444,9 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		}
 	}
 
-	err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX,
-			  xfrma_policy);
+	err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs,
+			  link->nla_max ? : XFRMA_MAX,
+			  link->nla_pol ? : xfrma_policy);
 	if (err < 0)
 		return err;
 
-- 
cgit v1.2.3


From 1df22b4ea9d91b01267fb61c155c31fb65d6b8a0 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Wed, 27 Aug 2014 22:58:45 +0200
Subject: usb: gadget: f_fs: add usb_functionfs_descs_head_v2 structure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The structure can be used with user space tools that use the new
functionfs description format, for example as follows:

static const struct {
	struct usb_functionfs_descs_head_v2 header;
	__le32 fs_count;
	__le32 hs_count;
	struct {
		…
	} fs_desc;
	struct {
		…
	} hs_desc;
} descriptors = {
	.header = {
		.magic = cpu_to_le32(FUNCTIONFS_DESCRIPTORS_MAGIC_V2),
		.length = cpu_to_le32(sizeof(descriptors)),
		.flags = cpu_to_le32(FUNCTIONFS_HAS_FS_DESC |
				     FUNCTIONFS_HAS_HS_DESC)
	},
	.fs_count = cpu_to_le32(X),
	.fs_desc = {
		…
	},
	.hs_count = cpu_to_le32(Y),
	.hs_desc = {
		…
	}
};

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/uapi/linux/usb/functionfs.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index 0154b2859fd7..3ca03de7c2a8 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -32,6 +32,16 @@ struct usb_endpoint_descriptor_no_audio {
 	__u8  bInterval;
 } __attribute__((packed));
 
+struct usb_functionfs_descs_head_v2 {
+	__le32 magic;
+	__le32 length;
+	__le32 flags;
+	/*
+	 * __le32 fs_count, hs_count, fs_count; must be included manually in
+	 * the structure taking flags into consideration.
+	 */
+} __attribute__((packed));
+
 /* Legacy format, deprecated as of 3.14. */
 struct usb_functionfs_descs_head {
 	__le32 magic;
-- 
cgit v1.2.3


From 51c208c746e800dba37d1a54d3c5e601630266c4 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Wed, 27 Aug 2014 22:58:46 +0200
Subject: tools: ffs-test: convert to new descriptor format
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit [ac8dde11: “Add flags to descriptors block”] functionfs
supports a new, more powerful and extensible, descriptor format.
Since ffs-test is probably the first thing users of the functionfs
interface see when they start writing functionfs user space daemons,
convert it to use the new format thus promoting it.

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/uapi/linux/usb/functionfs.h |  2 +-
 tools/usb/ffs-test.c                | 14 +++++++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index 3ca03de7c2a8..6d2a16b834bf 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -102,7 +102,7 @@ struct usb_ext_prop_desc {
  * structure.  Any flags that are not recognised cause the whole block to be
  * rejected with -ENOSYS.
  *
- * Legacy descriptors format:
+ * Legacy descriptors format (deprecated as of 3.14):
  *
  * | off | name      | type         | description                          |
  * |-----+-----------+--------------+--------------------------------------|
diff --git a/tools/usb/ffs-test.c b/tools/usb/ffs-test.c
index a87e99f37c52..708d317b0f37 100644
--- a/tools/usb/ffs-test.c
+++ b/tools/usb/ffs-test.c
@@ -1,5 +1,5 @@
 /*
- * ffs-test.c.c -- user mode filesystem api for usb composite function
+ * ffs-test.c -- user mode filesystem api for usb composite function
  *
  * Copyright (C) 2010 Samsung Electronics
  *                    Author: Michal Nazarewicz <mina86@mina86.com>
@@ -106,7 +106,9 @@ static void _msg(unsigned level, const char *fmt, ...)
 /******************** Descriptors and Strings *******************************/
 
 static const struct {
-	struct usb_functionfs_descs_head header;
+	struct usb_functionfs_descs_head_v2 header;
+	__le32 fs_count;
+	__le32 hs_count;
 	struct {
 		struct usb_interface_descriptor intf;
 		struct usb_endpoint_descriptor_no_audio sink;
@@ -114,11 +116,12 @@ static const struct {
 	} __attribute__((packed)) fs_descs, hs_descs;
 } __attribute__((packed)) descriptors = {
 	.header = {
-		.magic = cpu_to_le32(FUNCTIONFS_DESCRIPTORS_MAGIC),
+		.magic = cpu_to_le32(FUNCTIONFS_DESCRIPTORS_MAGIC_V2),
+		.flags = cpu_to_le32(FUNCTIONFS_HAS_FS_DESC |
+				     FUNCTIONFS_HAS_HS_DESC),
 		.length = cpu_to_le32(sizeof descriptors),
-		.fs_count = cpu_to_le32(3),
-		.hs_count = cpu_to_le32(3),
 	},
+	.fs_count = cpu_to_le32(3),
 	.fs_descs = {
 		.intf = {
 			.bLength = sizeof descriptors.fs_descs.intf,
@@ -142,6 +145,7 @@ static const struct {
 			/* .wMaxPacketSize = autoconfiguration (kernel) */
 		},
 	},
+	.hs_count = cpu_to_le32(3),
 	.hs_descs = {
 		.intf = {
 			.bLength = sizeof descriptors.fs_descs.intf,
-- 
cgit v1.2.3


From eadf9e26fab7f9841adcc36f3559dbce7604fcd5 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Thu, 21 Aug 2014 16:49:16 -0300
Subject: [media] videodev2.h: add __user to v4l2_ext_control pointers

These are not copied to kernel space by video_usercopy, so mark them
as __user.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/videodev2.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 778a3298fb34..0b1ba5c6a8d2 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1285,11 +1285,11 @@ struct v4l2_ext_control {
 	union {
 		__s32 value;
 		__s64 value64;
-		char *string;
-		__u8 *p_u8;
-		__u16 *p_u16;
-		__u32 *p_u32;
-		void *ptr;
+		char __user *string;
+		__u8 __user *p_u8;
+		__u16 __user *p_u16;
+		__u32 __user *p_u32;
+		void __user *ptr;
 	};
 } __attribute__ ((packed));
 
-- 
cgit v1.2.3


From 6fa9e1be7f28dd407d710c3ab367b1e5bc34c2bf Mon Sep 17 00:00:00 2001
From: Piotr Król <piotr.krol@3mdeb.com>
Date: Fri, 5 Sep 2014 00:58:02 +0200
Subject: usb: usbip: fix usbip.h path in userspace tool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes: 588b48caf65c ("usbip: move usbip userspace code out of staging")
which introduced build failure by not changing uapi/usbip.h include path
according to new location.

Signed-off-by: Piotr Król <piotr.krol@3mdeb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/Kbuild             | 1 +
 tools/usb/usbip/libsrc/usbip_common.h | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 24e9033f8b3f..9955c3b0ef93 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -395,6 +395,7 @@ header-y += un.h
 header-y += unistd.h
 header-y += unix_diag.h
 header-y += usbdevice_fs.h
+header-y += usbip.h
 header-y += utime.h
 header-y += utsname.h
 header-y += uuid.h
diff --git a/tools/usb/usbip/libsrc/usbip_common.h b/tools/usb/usbip/libsrc/usbip_common.h
index 5a0e95edf4df..15fe792e1e96 100644
--- a/tools/usb/usbip/libsrc/usbip_common.h
+++ b/tools/usb/usbip/libsrc/usbip_common.h
@@ -15,7 +15,7 @@
 #include <syslog.h>
 #include <unistd.h>
 #include <linux/usb/ch9.h>
-#include "../../uapi/usbip.h"
+#include <linux/usbip.h>
 
 #ifndef USBIDS_FILE
 #define USBIDS_FILE "/usr/share/hwdata/usb.ids"
-- 
cgit v1.2.3


From 1c7e23bf50264a251de53ad9fb1604683b801258 Mon Sep 17 00:00:00 2001
From: Assaf Krauss <assaf.krauss@intel.com>
Date: Wed, 3 Sep 2014 15:25:00 +0300
Subject: nl80211: Allow declaring RRM-related features

Radio Resource Measurement (RRM) is a bundle of features which will
require the entire stack to participate.
In this patch, the driver is given the opportunity to advertise the
device's support for these RRM-related features, using feature flags:
1. Support for Quiet IEs.
2. Support for adding DS Parameter Set IE to probe requests.
3. Support for adding WFA TPC Report IE to probe requests.
4. Support for inserting tx power value to tx-ed packets at a fixed
   offset. This is used in action frames, such as RRM's Link
   Measurement Report, where the actual tx power should be reported
   in the frame.

Signed-off-by: Assaf Krauss <assaf.krauss@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index d097568da690..c166d3d23e55 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3968,6 +3968,16 @@ enum nl80211_ap_sme_features {
  * @NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE: This driver supports dynamic
  *	channel bandwidth change (e.g., HT 20 <-> 40 MHz channel) during the
  *	lifetime of a BSS.
+ * @NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES: This device adds a DS Parameter
+ *	Set IE to probe requests.
+ * @NL80211_FEATURE_WFA_TPC_IE_IN_PROBES: This device adds a WFA TPC Report IE
+ *	to probe requests.
+ * @NL80211_FEATURE_QUIET: This device, in client mode, supports Quiet Period
+ *	requests sent to it by an AP.
+ * @NL80211_FEATURE_TX_POWER_INSERTION: This device is capable of inserting the
+ *	current tx power value into the TPC Report IE in the spectrum
+ *	management TPC Report action frame, and in the Radio Measurement Link
+ *	Measurement Report action frame.
  */
 enum nl80211_feature_flags {
 	NL80211_FEATURE_SK_TX_STATUS			= 1 << 0,
@@ -3989,6 +3999,10 @@ enum nl80211_feature_flags {
 	NL80211_FEATURE_USERSPACE_MPM			= 1 << 16,
 	NL80211_FEATURE_ACTIVE_MONITOR			= 1 << 17,
 	NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE	= 1 << 18,
+	NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES	= 1 << 19,
+	NL80211_FEATURE_WFA_TPC_IE_IN_PROBES		= 1 << 20,
+	NL80211_FEATURE_QUIET				= 1 << 21,
+	NL80211_FEATURE_TX_POWER_INSERTION		= 1 << 22,
 };
 
 /**
-- 
cgit v1.2.3


From bab5ab7d2a5466406e8003d038cc7ce6b2d5d804 Mon Sep 17 00:00:00 2001
From: Assaf Krauss <assaf.krauss@intel.com>
Date: Wed, 3 Sep 2014 15:25:01 +0300
Subject: nl80211: Add flag attribute for RRM connections

Add a flag attribute to use in associations, for tagging the target
connection as supporting RRM. It is the responsibility of upper
layers to set this flag only if both the underlying device, and the
target network indeed support RRM.
To be used in ASSOCIATE and CONNECT commands.

Signed-off-by: Assaf Krauss <assaf.krauss@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  2 ++
 include/uapi/linux/nl80211.h | 13 +++++++++++++
 net/wireless/nl80211.c       | 17 +++++++++++++++++
 3 files changed, 32 insertions(+)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 6be68bb31918..a887eeb5b31e 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1608,10 +1608,12 @@ struct cfg80211_auth_request {
  *
  * @ASSOC_REQ_DISABLE_HT:  Disable HT (802.11n)
  * @ASSOC_REQ_DISABLE_VHT:  Disable VHT
+ * @ASSOC_REQ_USE_RRM: Declare RRM capability in this association
  */
 enum cfg80211_assoc_req_flags {
 	ASSOC_REQ_DISABLE_HT		= BIT(0),
 	ASSOC_REQ_DISABLE_VHT		= BIT(1),
+	ASSOC_REQ_USE_RRM		= BIT(2),
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index c166d3d23e55..de69d3df5e55 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1594,6 +1594,17 @@ enum nl80211_commands {
  * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is
  *	the TDLS link initiator.
  *
+ * @NL80211_ATTR_USE_RRM: flag for indicating whether the current connection
+ *	shall support Radio Resource Measurements (11k). This attribute can be
+ *	used with %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests.
+ *	User space applications are expected to use this flag only if the
+ *	underlying device supports these minimal RRM features:
+ *		%NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES,
+ *		%NL80211_FEATURE_QUIET,
+ *	If this flag is used, driver must add the Power Capabilities IE to the
+ *	association request. In addition, it must also set the RRM capability
+ *	flag in the association request's Capability Info field.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1936,6 +1947,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_TDLS_INITIATOR,
 
+	NL80211_ATTR_USE_RRM,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d876146498dd..459dc2769d0e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -389,6 +389,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 },
 	[NL80211_ATTR_IFACE_SOCKET_OWNER] = { .type = NLA_FLAG },
 	[NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY },
+	[NL80211_ATTR_USE_RRM] = { .type = NLA_FLAG },
 };
 
 /* policy for the key attributes */
@@ -6584,6 +6585,14 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 		       sizeof(req.vht_capa));
 	}
 
+	if (nla_get_flag(info->attrs[NL80211_ATTR_USE_RRM])) {
+		if (!(rdev->wiphy.features &
+		      NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) ||
+		    !(rdev->wiphy.features & NL80211_FEATURE_QUIET))
+			return -EINVAL;
+		req.flags |= ASSOC_REQ_USE_RRM;
+	}
+
 	err = nl80211_crypto_settings(rdev, info, &req.crypto, 1);
 	if (!err) {
 		wdev_lock(dev->ieee80211_ptr);
@@ -7241,6 +7250,14 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 		       sizeof(connect.vht_capa));
 	}
 
+	if (nla_get_flag(info->attrs[NL80211_ATTR_USE_RRM])) {
+		if (!(rdev->wiphy.features &
+		      NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) ||
+		    !(rdev->wiphy.features & NL80211_FEATURE_QUIET))
+			return -EINVAL;
+		connect.flags |= ASSOC_REQ_USE_RRM;
+	}
+
 	wdev_lock(dev->ieee80211_ptr);
 	err = cfg80211_connect(rdev, dev, &connect, connkeys, NULL);
 	wdev_unlock(dev->ieee80211_ptr);
-- 
cgit v1.2.3


From 3057dbfdab1b86a77ed6d512fc857b032f78663b Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
Date: Thu, 4 Sep 2014 23:57:40 +0200
Subject: cfg80211: enable dynack through nl80211

Enable ACK timeout estimation algorithm (dynack) using mac80211
set_coverage_class API. Dynack is activated passing coverage class equals to -1
to lower drivers and it is automatically disabled setting valid value for
coverage class.
Define NL80211_ATTR_WIPHY_DYN_ACK flag attribute to enable dynack from
userspace. In order to activate dynack NL80211_FEATURE_ACKTO_ESTIMATION feature
flag must be set by lower drivers to indicate dynack capability.

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  2 ++
 include/uapi/linux/nl80211.h | 12 ++++++++++++
 net/wireless/nl80211.c       | 11 +++++++++++
 3 files changed, 25 insertions(+)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index a887eeb5b31e..0d17ec9df692 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1805,6 +1805,7 @@ struct cfg80211_connect_params {
  * @WIPHY_PARAM_FRAG_THRESHOLD: wiphy->frag_threshold has changed
  * @WIPHY_PARAM_RTS_THRESHOLD: wiphy->rts_threshold has changed
  * @WIPHY_PARAM_COVERAGE_CLASS: coverage class changed
+ * @WIPHY_PARAM_DYN_ACK: dynack has been enabled
  */
 enum wiphy_params_flags {
 	WIPHY_PARAM_RETRY_SHORT		= 1 << 0,
@@ -1812,6 +1813,7 @@ enum wiphy_params_flags {
 	WIPHY_PARAM_FRAG_THRESHOLD	= 1 << 2,
 	WIPHY_PARAM_RTS_THRESHOLD	= 1 << 3,
 	WIPHY_PARAM_COVERAGE_CLASS	= 1 << 4,
+	WIPHY_PARAM_DYN_ACK		= 1 << 5,
 };
 
 /*
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index de69d3df5e55..29c4399e08b9 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1605,6 +1605,12 @@ enum nl80211_commands {
  *	association request. In addition, it must also set the RRM capability
  *	flag in the association request's Capability Info field.
  *
+ * @NL80211_ATTR_WIPHY_DYN_ACK: flag attribute used to enable ACK timeout
+ *	estimation algorithm (dynack). In order to activate dynack
+ *	%NL80211_FEATURE_ACKTO_ESTIMATION feature flag must be set by lower
+ *	drivers to indicate dynack capability. Dynack is automatically disabled
+ *	setting valid value for coverage class.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1949,6 +1955,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_USE_RRM,
 
+	NL80211_ATTR_WIPHY_DYN_ACK,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -3991,6 +3999,9 @@ enum nl80211_ap_sme_features {
  *	current tx power value into the TPC Report IE in the spectrum
  *	management TPC Report action frame, and in the Radio Measurement Link
  *	Measurement Report action frame.
+ * @NL80211_FEATURE_ACKTO_ESTIMATION: This driver supports dynamic ACK timeout
+ *	estimation (dynack). %NL80211_ATTR_WIPHY_DYN_ACK flag attribute is used
+ *	to enable dynack.
  */
 enum nl80211_feature_flags {
 	NL80211_FEATURE_SK_TX_STATUS			= 1 << 0,
@@ -4016,6 +4027,7 @@ enum nl80211_feature_flags {
 	NL80211_FEATURE_WFA_TPC_IE_IN_PROBES		= 1 << 20,
 	NL80211_FEATURE_QUIET				= 1 << 21,
 	NL80211_FEATURE_TX_POWER_INSERTION		= 1 << 22,
+	NL80211_FEATURE_ACKTO_ESTIMATION		= 1 << 23,
 };
 
 /**
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 459dc2769d0e..cf178d2b621d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -226,6 +226,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_WIPHY_FRAG_THRESHOLD] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_RTS_THRESHOLD] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_COVERAGE_CLASS] = { .type = NLA_U8 },
+	[NL80211_ATTR_WIPHY_DYN_ACK] = { .type = NLA_FLAG },
 
 	[NL80211_ATTR_IFTYPE] = { .type = NLA_U32 },
 	[NL80211_ATTR_IFINDEX] = { .type = NLA_U32 },
@@ -2239,11 +2240,21 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]) {
+		if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK])
+			return -EINVAL;
+
 		coverage_class = nla_get_u8(
 			info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]);
 		changed |= WIPHY_PARAM_COVERAGE_CLASS;
 	}
 
+	if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK]) {
+		if (!(rdev->wiphy.features & NL80211_FEATURE_ACKTO_ESTIMATION))
+			return -EOPNOTSUPP;
+
+		changed |= WIPHY_PARAM_DYN_ACK;
+	}
+
 	if (changed) {
 		u8 old_retry_short, old_retry_long;
 		u32 old_frag_threshold, old_rts_threshold;
-- 
cgit v1.2.3


From f0db9b073415848709dd59a6394969882f517da9 Mon Sep 17 00:00:00 2001
From: Govindarajulu Varadarajan <_govind@gmx.com>
Date: Wed, 3 Sep 2014 03:17:20 +0530
Subject: ethtool: Add generic options for tunables

This patch adds new ethtool cmd, ETHTOOL_GTUNABLE & ETHTOOL_STUNABLE for getting
tunable values from driver.

Add get_tunable and set_tunable to ethtool_ops. Driver implements these
functions for getting/setting tunable value.

Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h      |  4 +++
 include/uapi/linux/ethtool.h | 28 +++++++++++++++
 net/core/ethtool.c           | 81 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 113 insertions(+)

(limited to 'include/uapi')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index e658229fee39..c1a2d60dfb82 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -257,6 +257,10 @@ struct ethtool_ops {
 				     struct ethtool_eeprom *, u8 *);
 	int	(*get_eee)(struct net_device *, struct ethtool_eee *);
 	int	(*set_eee)(struct net_device *, struct ethtool_eee *);
+	int	(*get_tunable)(struct net_device *,
+			       const struct ethtool_tunable *, void *);
+	int	(*set_tunable)(struct net_device *,
+			       const struct ethtool_tunable *, const void *);
 
 
 };
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index e3c7a719c76b..7a364f2f3d3f 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -209,6 +209,32 @@ struct ethtool_value {
 	__u32	data;
 };
 
+enum tunable_id {
+	ETHTOOL_ID_UNSPEC,
+	ETHTOOL_RX_COPYBREAK,
+};
+
+enum tunable_type_id {
+	ETHTOOL_TUNABLE_UNSPEC,
+	ETHTOOL_TUNABLE_U8,
+	ETHTOOL_TUNABLE_U16,
+	ETHTOOL_TUNABLE_U32,
+	ETHTOOL_TUNABLE_U64,
+	ETHTOOL_TUNABLE_STRING,
+	ETHTOOL_TUNABLE_S8,
+	ETHTOOL_TUNABLE_S16,
+	ETHTOOL_TUNABLE_S32,
+	ETHTOOL_TUNABLE_S64,
+};
+
+struct ethtool_tunable {
+	__u32	cmd;
+	__u32	id;
+	__u32	type_id;
+	__u32	len;
+	void	*data[0];
+};
+
 /**
  * struct ethtool_regs - hardware register dump
  * @cmd: Command number = %ETHTOOL_GREGS
@@ -1152,6 +1178,8 @@ enum ethtool_sfeatures_retval_bits {
 
 #define ETHTOOL_GRSSH		0x00000046 /* Get RX flow hash configuration */
 #define ETHTOOL_SRSSH		0x00000047 /* Set RX flow hash configuration */
+#define ETHTOOL_GTUNABLE	0x00000048 /* Get tunable configuration */
+#define ETHTOOL_STUNABLE	0x00000049 /* Set tunable configuration */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 17cb912793fa..27e61b886520 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1621,6 +1621,80 @@ static int ethtool_get_module_eeprom(struct net_device *dev,
 				      modinfo.eeprom_len);
 }
 
+static int ethtool_tunable_valid(const struct ethtool_tunable *tuna)
+{
+	switch (tuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		if (tuna->len != sizeof(u32) ||
+		    tuna->type_id != ETHTOOL_TUNABLE_U32)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr)
+{
+	int ret;
+	struct ethtool_tunable tuna;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	void *data;
+
+	if (!ops->get_tunable)
+		return -EOPNOTSUPP;
+	if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
+		return -EFAULT;
+	ret = ethtool_tunable_valid(&tuna);
+	if (ret)
+		return ret;
+	data = kmalloc(tuna.len, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+	ret = ops->get_tunable(dev, &tuna, data);
+	if (ret)
+		goto out;
+	useraddr += sizeof(tuna);
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, data, tuna.len))
+		goto out;
+	ret = 0;
+
+out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr)
+{
+	int ret;
+	struct ethtool_tunable tuna;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	void *data;
+
+	if (!ops->set_tunable)
+		return -EOPNOTSUPP;
+	if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
+		return -EFAULT;
+	ret = ethtool_tunable_valid(&tuna);
+	if (ret)
+		return ret;
+	data = kmalloc(tuna.len, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+	useraddr += sizeof(tuna);
+	ret = -EFAULT;
+	if (copy_from_user(data, useraddr, tuna.len))
+		goto out;
+	ret = ops->set_tunable(dev, &tuna, data);
+
+out:
+	kfree(data);
+	return ret;
+}
+
 /* The main entry point in this file.  Called from net/core/dev_ioctl.c */
 
 int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -1670,6 +1744,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GCHANNELS:
 	case ETHTOOL_GET_TS_INFO:
 	case ETHTOOL_GEEE:
+	case ETHTOOL_GTUNABLE:
 		break;
 	default:
 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
@@ -1857,6 +1932,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GMODULEEEPROM:
 		rc = ethtool_get_module_eeprom(dev, useraddr);
 		break;
+	case ETHTOOL_GTUNABLE:
+		rc = ethtool_get_tunable(dev, useraddr);
+		break;
+	case ETHTOOL_STUNABLE:
+		rc = ethtool_set_tunable(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3


From d4288d3fac18bbc31cb6d369679b1fa1d9321ae9 Mon Sep 17 00:00:00 2001
From: Jurgen Kramer <gtmkramer@xs4all.nl>
Date: Fri, 5 Sep 2014 10:47:56 +0200
Subject: ALSA: pcm: add new DSD sampleformat for native DSD playback on XMOS
 based devices

XMOS based USB DACs with native DSD support expose this feature via a USB
alternate setting. The audio format is either 32-bit raw or a 32-bit PCM format.
To utilize this feature on linux this patch introduces a new 32-bit DSD
sampleformat DSD_U32_LE.
A follow up patch will add a quirk for XMOS based devices to utilize the new format.
Further patches will add support to alsa-lib.

Signed-off-by: Jurgen Kramer <gtmkramer@xs4all.nl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h         | 1 +
 include/uapi/sound/asound.h | 3 ++-
 sound/core/pcm.c            | 1 +
 sound/core/pcm_misc.c       | 4 ++++
 4 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 67e0bdb9f0fa..e862497f7556 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -183,6 +183,7 @@ struct snd_pcm_ops {
 #define SNDRV_PCM_FMTBIT_G723_40_1B	_SNDRV_PCM_FMTBIT(G723_40_1B)
 #define SNDRV_PCM_FMTBIT_DSD_U8		_SNDRV_PCM_FMTBIT(DSD_U8)
 #define SNDRV_PCM_FMTBIT_DSD_U16_LE	_SNDRV_PCM_FMTBIT(DSD_U16_LE)
+#define SNDRV_PCM_FMTBIT_DSD_U32_LE	_SNDRV_PCM_FMTBIT(DSD_U32_LE)
 
 #ifdef SNDRV_LITTLE_ENDIAN
 #define SNDRV_PCM_FMTBIT_S16		SNDRV_PCM_FMTBIT_S16_LE
diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index 32168f7ffce3..6ee586728df9 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -219,7 +219,8 @@ typedef int __bitwise snd_pcm_format_t;
 #define	SNDRV_PCM_FORMAT_G723_40_1B	((__force snd_pcm_format_t) 47) /* 1 sample in 1 byte */
 #define	SNDRV_PCM_FORMAT_DSD_U8		((__force snd_pcm_format_t) 48) /* DSD, 1-byte samples DSD (x8) */
 #define	SNDRV_PCM_FORMAT_DSD_U16_LE	((__force snd_pcm_format_t) 49) /* DSD, 2-byte samples DSD (x16), little endian */
-#define	SNDRV_PCM_FORMAT_LAST		SNDRV_PCM_FORMAT_DSD_U16_LE
+#define	SNDRV_PCM_FORMAT_DSD_U32_LE	((__force snd_pcm_format_t) 50) /* DSD, 4-byte samples DSD (x32), little endian */
+#define	SNDRV_PCM_FORMAT_LAST		SNDRV_PCM_FORMAT_DSD_U32_LE
 
 #ifdef SNDRV_LITTLE_ENDIAN
 #define	SNDRV_PCM_FORMAT_S16		SNDRV_PCM_FORMAT_S16_LE
diff --git a/sound/core/pcm.c b/sound/core/pcm.c
index afccdc553ef9..42ded997b223 100644
--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -215,6 +215,7 @@ static char *snd_pcm_format_names[] = {
 	FORMAT(G723_40_1B),
 	FORMAT(DSD_U8),
 	FORMAT(DSD_U16_LE),
+	FORMAT(DSD_U32_LE),
 };
 
 const char *snd_pcm_format_name(snd_pcm_format_t format)
diff --git a/sound/core/pcm_misc.c b/sound/core/pcm_misc.c
index 4560ca0e5651..83f54e1dbac9 100644
--- a/sound/core/pcm_misc.c
+++ b/sound/core/pcm_misc.c
@@ -148,6 +148,10 @@ static struct pcm_format_data pcm_formats[(INT)SNDRV_PCM_FORMAT_LAST+1] = {
 		.width = 16, .phys = 16, .le = 1, .signd = 0,
 		.silence = {},
 	},
+	[SNDRV_PCM_FORMAT_DSD_U32_LE] = {
+		.width = 32, .phys = 32, .le = 1, .signd = 0,
+		.silence = { 0x69, 0x69, 0x69, 0x69 },
+	},
 	/* FIXME: the following three formats are not defined properly yet */
 	[SNDRV_PCM_FORMAT_MPEG] = {
 		.le = -1, .signd = -1,
-- 
cgit v1.2.3


From 7611392fe8ff95ecae528b01a815ae3d72ca6b95 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 8 Sep 2014 14:42:12 -0700
Subject: Input: add INPUT_PROP_POINTING_STICK property

It is useful for userspace to know that there not dealing with a regular
mouse but rather with a pointing stick (e.g. a trackpoint) so that
userspace can e.g. automatically enable middle button scrollwheel
emulation.

It is impossible to tell the difference from the evdev info without
resorting to putting a list of device / driver names in userspace, this is
undesirable.

Add a property which allows userspace to see if a device is a pointing
stick, and set it on all the pointing stick drivers.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Acked-by: Peter Hutterer <peter.hutterer@who-t.net>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/alps.c          | 3 +++
 drivers/input/mouse/elantech.c      | 3 +++
 drivers/input/mouse/synaptics_usb.c | 1 +
 drivers/input/mouse/trackpoint.c    | 2 ++
 include/uapi/linux/input.h          | 1 +
 5 files changed, 10 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index a956b980ee73..9d9e5076d402 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -2373,6 +2373,9 @@ int alps_init(struct psmouse *psmouse)
 	dev2->keybit[BIT_WORD(BTN_LEFT)] =
 		BIT_MASK(BTN_LEFT) | BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
 
+	if (priv->flags & ALPS_DUALPOINT)
+		__set_bit(INPUT_PROP_POINTING_STICK, dev2->propbit);
+
 	if (input_register_device(priv->dev2))
 		goto init_fail;
 
diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index d71bd3635de1..0cbf6281899c 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1614,6 +1614,9 @@ int elantech_init(struct psmouse *psmouse)
 		tp_dev->keybit[BIT_WORD(BTN_LEFT)] =
 			BIT_MASK(BTN_LEFT) | BIT_MASK(BTN_MIDDLE) |
 			BIT_MASK(BTN_RIGHT);
+
+		__set_bit(INPUT_PROP_POINTING_STICK, tp_dev->propbit);
+
 		error = input_register_device(etd->tp_dev);
 		if (error < 0)
 			goto init_fail_tp_reg;
diff --git a/drivers/input/mouse/synaptics_usb.c b/drivers/input/mouse/synaptics_usb.c
index e122bda16aab..db3973d78207 100644
--- a/drivers/input/mouse/synaptics_usb.c
+++ b/drivers/input/mouse/synaptics_usb.c
@@ -387,6 +387,7 @@ static int synusb_probe(struct usb_interface *intf,
 		__set_bit(EV_REL, input_dev->evbit);
 		__set_bit(REL_X, input_dev->relbit);
 		__set_bit(REL_Y, input_dev->relbit);
+		__set_bit(INPUT_PROP_POINTING_STICK, input_dev->propbit);
 		input_set_abs_params(input_dev, ABS_PRESSURE, 0, 127, 0, 0);
 	} else {
 		input_set_abs_params(input_dev, ABS_X,
diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c
index ca843b6cf6bd..b377462748a2 100644
--- a/drivers/input/mouse/trackpoint.c
+++ b/drivers/input/mouse/trackpoint.c
@@ -393,6 +393,8 @@ int trackpoint_detect(struct psmouse *psmouse, bool set_properties)
 	if ((button_info & 0x0f) >= 3)
 		__set_bit(BTN_MIDDLE, psmouse->dev->keybit);
 
+	__set_bit(INPUT_PROP_POINTING_STICK, psmouse->dev->propbit);
+
 	trackpoint_defaults(psmouse->private);
 
 	error = trackpoint_power_on_reset(&psmouse->ps2dev);
diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h
index 19df18c9b8be..1874ebe9ac1e 100644
--- a/include/uapi/linux/input.h
+++ b/include/uapi/linux/input.h
@@ -165,6 +165,7 @@ struct input_keymap_entry {
 #define INPUT_PROP_BUTTONPAD		0x02	/* has button(s) under pad */
 #define INPUT_PROP_SEMI_MT		0x03	/* touch rectangle only */
 #define INPUT_PROP_TOPBUTTONPAD		0x04	/* softbuttons at top of pad */
+#define INPUT_PROP_POINTING_STICK	0x05	/* is a pointing stick */
 
 #define INPUT_PROP_MAX			0x1f
 #define INPUT_PROP_CNT			(INPUT_PROP_MAX + 1)
-- 
cgit v1.2.3


From ff7693d079e58fb62d735b7b8085b53fcfb74528 Mon Sep 17 00:00:00 2001
From: Carlo Caione <carlo@caione.org>
Date: Sun, 17 Aug 2014 12:49:49 +0200
Subject: ARM: meson: serial: add MesonX SoC on-chip uart driver

The SoC has four fully functional UARTs which use the same programming
model. They are named UART_A, UART_B, UART_C and UART_AO (Always-On)
which cannot be powered off.

Signed-off-by: Carlo Caione <carlo@caione.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/Kconfig       |  18 ++
 drivers/tty/serial/Makefile      |   1 +
 drivers/tty/serial/meson_uart.c  | 634 +++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/serial_core.h |   3 +
 4 files changed, 656 insertions(+)
 create mode 100644 drivers/tty/serial/meson_uart.c

(limited to 'include/uapi')

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 75cc59f5d253..636949d8144d 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -200,6 +200,24 @@ config SERIAL_KS8695_CONSOLE
 	  receives all kernel messages and warnings and which allows
 	  logins in single user mode).
 
+config SERIAL_MESON
+	tristate "Meson serial port support"
+	depends on ARCH_MESON
+	select SERIAL_CORE
+	help
+	  This enables the driver for the on-chip UARTs of the Amlogic
+	  MesonX processors.
+
+config SERIAL_MESON_CONSOLE
+	bool "Support for console on meson"
+	depends on SERIAL_MESON=y
+	select SERIAL_CORE_CONSOLE
+	help
+	  Say Y here if you wish to use a Amlogic MesonX UART as the
+	  system console (the system console is the device which
+	  receives all kernel messages and warnings and which allows
+	  logins in single user mode) as /dev/ttyAMLx.
+
 config SERIAL_CLPS711X
 	tristate "CLPS711X serial port support"
 	depends on ARCH_CLPS711X || COMPILE_TEST
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index 0080cc362e09..9a548acf5fdc 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_SERIAL_MPC52xx) += mpc52xx_uart.o
 obj-$(CONFIG_SERIAL_ICOM) += icom.o
 obj-$(CONFIG_SERIAL_M32R_SIO) += m32r_sio.o
 obj-$(CONFIG_SERIAL_MPSC) += mpsc.o
+obj-$(CONFIG_SERIAL_MESON) += meson_uart.o
 obj-$(CONFIG_SERIAL_SB1250_DUART) += sb1250-duart.o
 obj-$(CONFIG_ETRAX_SERIAL) += crisv10.o
 obj-$(CONFIG_SERIAL_SCCNXP) += sccnxp.o
diff --git a/drivers/tty/serial/meson_uart.c b/drivers/tty/serial/meson_uart.c
new file mode 100644
index 000000000000..15c749753317
--- /dev/null
+++ b/drivers/tty/serial/meson_uart.c
@@ -0,0 +1,634 @@
+/*
+ *  Based on meson_uart.c, by AMLOGIC, INC.
+ *
+ * Copyright (C) 2014 Carlo Caione <carlo@caione.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/serial.h>
+#include <linux/serial_core.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+
+/* Register offsets */
+#define AML_UART_WFIFO			0x00
+#define AML_UART_RFIFO			0x04
+#define AML_UART_CONTROL		0x08
+#define AML_UART_STATUS			0x0c
+#define AML_UART_MISC			0x10
+#define AML_UART_REG5			0x14
+
+/* AML_UART_CONTROL bits */
+#define AML_UART_TX_EN			BIT(12)
+#define AML_UART_RX_EN			BIT(13)
+#define AML_UART_TX_RST			BIT(22)
+#define AML_UART_RX_RST			BIT(23)
+#define AML_UART_CLR_ERR		BIT(24)
+#define AML_UART_RX_INT_EN		BIT(27)
+#define AML_UART_TX_INT_EN		BIT(28)
+#define AML_UART_DATA_LEN_MASK		(0x03 << 20)
+#define AML_UART_DATA_LEN_8BIT		(0x00 << 20)
+#define AML_UART_DATA_LEN_7BIT		(0x01 << 20)
+#define AML_UART_DATA_LEN_6BIT		(0x02 << 20)
+#define AML_UART_DATA_LEN_5BIT		(0x03 << 20)
+
+/* AML_UART_STATUS bits */
+#define AML_UART_PARITY_ERR		BIT(16)
+#define AML_UART_FRAME_ERR		BIT(17)
+#define AML_UART_TX_FIFO_WERR		BIT(18)
+#define AML_UART_RX_EMPTY		BIT(20)
+#define AML_UART_TX_FULL		BIT(21)
+#define AML_UART_TX_EMPTY		BIT(22)
+#define AML_UART_ERR			(AML_UART_PARITY_ERR | \
+					 AML_UART_FRAME_ERR  | \
+					 AML_UART_TX_FIFO_WERR)
+
+/* AML_UART_CONTROL bits */
+#define AML_UART_TWO_WIRE_EN		BIT(15)
+#define AML_UART_PARITY_TYPE		BIT(18)
+#define AML_UART_PARITY_EN		BIT(19)
+#define AML_UART_CLEAR_ERR		BIT(24)
+#define AML_UART_STOP_BIN_LEN_MASK	(0x03 << 16)
+#define AML_UART_STOP_BIN_1SB		(0x00 << 16)
+#define AML_UART_STOP_BIN_2SB		(0x01 << 16)
+
+/* AML_UART_MISC bits */
+#define AML_UART_XMIT_IRQ(c)		(((c) & 0xff) << 8)
+#define AML_UART_RECV_IRQ(c)		((c) & 0xff)
+
+/* AML_UART_REG5 bits */
+#define AML_UART_BAUD_MASK		0x7fffff
+#define AML_UART_BAUD_USE		BIT(23)
+
+#define AML_UART_PORT_NUM		6
+#define AML_UART_DEV_NAME		"ttyAML"
+
+
+static struct uart_driver meson_uart_driver;
+
+static struct uart_port *meson_ports[AML_UART_PORT_NUM];
+
+static void meson_uart_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+}
+
+static unsigned int meson_uart_get_mctrl(struct uart_port *port)
+{
+	return TIOCM_CTS;
+}
+
+static unsigned int meson_uart_tx_empty(struct uart_port *port)
+{
+	u32 val;
+
+	val = readl(port->membase + AML_UART_STATUS);
+	return (val & AML_UART_TX_EMPTY) ? TIOCSER_TEMT : 0;
+}
+
+static void meson_uart_stop_tx(struct uart_port *port)
+{
+	u32 val;
+
+	val = readl(port->membase + AML_UART_CONTROL);
+	val &= ~AML_UART_TX_EN;
+	writel(val, port->membase + AML_UART_CONTROL);
+}
+
+static void meson_uart_stop_rx(struct uart_port *port)
+{
+	u32 val;
+
+	val = readl(port->membase + AML_UART_CONTROL);
+	val &= ~AML_UART_RX_EN;
+	writel(val, port->membase + AML_UART_CONTROL);
+}
+
+static void meson_uart_shutdown(struct uart_port *port)
+{
+	unsigned long flags;
+	u32 val;
+
+	free_irq(port->irq, port);
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	val = readl(port->membase + AML_UART_CONTROL);
+	val &= ~(AML_UART_RX_EN | AML_UART_TX_EN);
+	val &= ~(AML_UART_RX_INT_EN | AML_UART_TX_INT_EN);
+	writel(val, port->membase + AML_UART_CONTROL);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void meson_uart_start_tx(struct uart_port *port)
+{
+	struct circ_buf *xmit = &port->state->xmit;
+	unsigned int ch;
+
+	if (uart_tx_stopped(port)) {
+		meson_uart_stop_tx(port);
+		return;
+	}
+
+	while (!(readl(port->membase + AML_UART_STATUS) & AML_UART_TX_FULL)) {
+		if (port->x_char) {
+			writel(port->x_char, port->membase + AML_UART_WFIFO);
+			port->icount.tx++;
+			port->x_char = 0;
+			continue;
+		}
+
+		if (uart_circ_empty(xmit))
+			break;
+
+		ch = xmit->buf[xmit->tail];
+		writel(ch, port->membase + AML_UART_WFIFO);
+		xmit->tail = (xmit->tail+1) & (SERIAL_XMIT_SIZE - 1);
+		port->icount.tx++;
+	}
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+}
+
+static void meson_receive_chars(struct uart_port *port)
+{
+	struct tty_port *tport = &port->state->port;
+	char flag;
+	u32 status, ch, mode;
+
+	do {
+		flag = TTY_NORMAL;
+		port->icount.rx++;
+		status = readl(port->membase + AML_UART_STATUS);
+
+		if (status & AML_UART_ERR) {
+			if (status & AML_UART_TX_FIFO_WERR)
+				port->icount.overrun++;
+			else if (status & AML_UART_FRAME_ERR)
+				port->icount.frame++;
+			else if (status & AML_UART_PARITY_ERR)
+				port->icount.frame++;
+
+			mode = readl(port->membase + AML_UART_CONTROL);
+			mode |= AML_UART_CLEAR_ERR;
+			writel(mode, port->membase + AML_UART_CONTROL);
+
+			/* It doesn't clear to 0 automatically */
+			mode &= ~AML_UART_CLEAR_ERR;
+			writel(mode, port->membase + AML_UART_CONTROL);
+
+			status &= port->read_status_mask;
+			if (status & AML_UART_FRAME_ERR)
+				flag = TTY_FRAME;
+			else if (status & AML_UART_PARITY_ERR)
+				flag = TTY_PARITY;
+		}
+
+		ch = readl(port->membase + AML_UART_RFIFO);
+		ch &= 0xff;
+
+		if ((status & port->ignore_status_mask) == 0)
+			tty_insert_flip_char(tport, ch, flag);
+
+		if (status & AML_UART_TX_FIFO_WERR)
+			tty_insert_flip_char(tport, 0, TTY_OVERRUN);
+
+	} while (!(readl(port->membase + AML_UART_STATUS) & AML_UART_RX_EMPTY));
+
+	spin_unlock(&port->lock);
+	tty_flip_buffer_push(tport);
+	spin_lock(&port->lock);
+}
+
+static irqreturn_t meson_uart_interrupt(int irq, void *dev_id)
+{
+	struct uart_port *port = (struct uart_port *)dev_id;
+
+	spin_lock(&port->lock);
+
+	if (!(readl(port->membase + AML_UART_STATUS) & AML_UART_RX_EMPTY))
+		meson_receive_chars(port);
+
+	if (!(readl(port->membase + AML_UART_STATUS) & AML_UART_TX_FULL))
+		meson_uart_start_tx(port);
+
+	spin_unlock(&port->lock);
+
+	return IRQ_HANDLED;
+}
+
+static const char *meson_uart_type(struct uart_port *port)
+{
+	return (port->type == PORT_MESON) ? "meson_uart" : NULL;
+}
+
+static int meson_uart_startup(struct uart_port *port)
+{
+	u32 val;
+	int ret = 0;
+
+	val = readl(port->membase + AML_UART_CONTROL);
+	val |= (AML_UART_RX_RST | AML_UART_TX_RST | AML_UART_CLR_ERR);
+	writel(val, port->membase + AML_UART_CONTROL);
+
+	val &= ~(AML_UART_RX_RST | AML_UART_TX_RST | AML_UART_CLR_ERR);
+	writel(val, port->membase + AML_UART_CONTROL);
+
+	val |= (AML_UART_RX_EN | AML_UART_TX_EN);
+	writel(val, port->membase + AML_UART_CONTROL);
+
+	val |= (AML_UART_RX_INT_EN | AML_UART_TX_INT_EN);
+	writel(val, port->membase + AML_UART_CONTROL);
+
+	val = (AML_UART_RECV_IRQ(1) | AML_UART_XMIT_IRQ(port->fifosize / 2));
+	writel(val, port->membase + AML_UART_MISC);
+
+	ret = request_irq(port->irq, meson_uart_interrupt, 0,
+			  meson_uart_type(port), port);
+
+	return ret;
+}
+
+static void meson_uart_change_speed(struct uart_port *port, unsigned long baud)
+{
+	u32 val;
+
+	while (!(readl(port->membase + AML_UART_STATUS) & AML_UART_TX_EMPTY))
+		cpu_relax();
+
+	val = readl(port->membase + AML_UART_REG5);
+	val &= ~AML_UART_BAUD_MASK;
+	val = ((port->uartclk * 10 / (baud * 4) + 5) / 10) - 1;
+	val |= AML_UART_BAUD_USE;
+	writel(val, port->membase + AML_UART_REG5);
+}
+
+static void meson_uart_set_termios(struct uart_port *port,
+				   struct ktermios *termios,
+				   struct ktermios *old)
+{
+	unsigned int cflags, iflags, baud;
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	cflags = termios->c_cflag;
+	iflags = termios->c_iflag;
+
+	val = readl(port->membase + AML_UART_CONTROL);
+
+	val &= ~AML_UART_DATA_LEN_MASK;
+	switch (cflags & CSIZE) {
+	case CS8:
+		val |= AML_UART_DATA_LEN_8BIT;
+		break;
+	case CS7:
+		val |= AML_UART_DATA_LEN_7BIT;
+		break;
+	case CS6:
+		val |= AML_UART_DATA_LEN_6BIT;
+		break;
+	case CS5:
+		val |= AML_UART_DATA_LEN_5BIT;
+		break;
+	}
+
+	if (cflags & PARENB)
+		val |= AML_UART_PARITY_EN;
+	else
+		val &= ~AML_UART_PARITY_EN;
+
+	if (cflags & PARODD)
+		val |= AML_UART_PARITY_TYPE;
+	else
+		val &= ~AML_UART_PARITY_TYPE;
+
+	val &= ~AML_UART_STOP_BIN_LEN_MASK;
+	if (cflags & CSTOPB)
+		val |= AML_UART_STOP_BIN_2SB;
+	else
+		val &= ~AML_UART_STOP_BIN_1SB;
+
+	if (cflags & CRTSCTS)
+		val &= ~AML_UART_TWO_WIRE_EN;
+	else
+		val |= AML_UART_TWO_WIRE_EN;
+
+	writel(val, port->membase + AML_UART_CONTROL);
+
+	baud = uart_get_baud_rate(port, termios, old, 9600, 115200);
+	meson_uart_change_speed(port, baud);
+
+	port->read_status_mask = AML_UART_TX_FIFO_WERR;
+	if (iflags & INPCK)
+		port->read_status_mask |= AML_UART_PARITY_ERR |
+					  AML_UART_FRAME_ERR;
+
+	port->ignore_status_mask = 0;
+	if (iflags & IGNPAR)
+		port->ignore_status_mask |= AML_UART_PARITY_ERR |
+					    AML_UART_FRAME_ERR;
+
+	uart_update_timeout(port, termios->c_cflag, baud);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static int meson_uart_verify_port(struct uart_port *port,
+				  struct serial_struct *ser)
+{
+	int ret = 0;
+
+	if (port->type != PORT_MESON)
+		ret = -EINVAL;
+	if (port->irq != ser->irq)
+		ret = -EINVAL;
+	if (ser->baud_base < 9600)
+		ret = -EINVAL;
+	return ret;
+}
+
+static void meson_uart_release_port(struct uart_port *port)
+{
+	if (port->flags & UPF_IOREMAP) {
+		iounmap(port->membase);
+		port->membase = NULL;
+	}
+}
+
+static int meson_uart_request_port(struct uart_port *port)
+{
+	struct platform_device *pdev = to_platform_device(port->dev);
+	struct resource *res;
+	int size;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "cannot obtain I/O memory region");
+		return -ENODEV;
+	}
+	size = resource_size(res);
+
+	if (!devm_request_mem_region(port->dev, port->mapbase, size,
+				     dev_name(port->dev))) {
+		dev_err(port->dev, "Memory region busy\n");
+		return -EBUSY;
+	}
+
+	if (port->flags & UPF_IOREMAP) {
+		port->membase = devm_ioremap_nocache(port->dev,
+						     port->mapbase,
+						     size);
+		if (port->membase == NULL)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void meson_uart_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE) {
+		port->type = PORT_MESON;
+		meson_uart_request_port(port);
+	}
+}
+
+static struct uart_ops meson_uart_ops = {
+	.set_mctrl      = meson_uart_set_mctrl,
+	.get_mctrl      = meson_uart_get_mctrl,
+	.tx_empty	= meson_uart_tx_empty,
+	.start_tx	= meson_uart_start_tx,
+	.stop_tx	= meson_uart_stop_tx,
+	.stop_rx	= meson_uart_stop_rx,
+	.startup	= meson_uart_startup,
+	.shutdown	= meson_uart_shutdown,
+	.set_termios	= meson_uart_set_termios,
+	.type		= meson_uart_type,
+	.config_port	= meson_uart_config_port,
+	.request_port	= meson_uart_request_port,
+	.release_port	= meson_uart_release_port,
+	.verify_port	= meson_uart_verify_port,
+};
+
+#ifdef CONFIG_SERIAL_MESON_CONSOLE
+
+static void meson_console_putchar(struct uart_port *port, int ch)
+{
+	if (!port->membase)
+		return;
+
+	while (readl(port->membase + AML_UART_STATUS) & AML_UART_TX_FULL)
+		cpu_relax();
+	writel(ch, port->membase + AML_UART_WFIFO);
+}
+
+static void meson_serial_console_write(struct console *co, const char *s,
+				       u_int count)
+{
+	struct uart_port *port;
+	unsigned long flags;
+	int locked;
+
+	port = meson_ports[co->index];
+	if (!port)
+		return;
+
+	local_irq_save(flags);
+	if (port->sysrq) {
+		locked = 0;
+	} else if (oops_in_progress) {
+		locked = spin_trylock(&port->lock);
+	} else {
+		spin_lock(&port->lock);
+		locked = 1;
+	}
+
+	uart_console_write(port, s, count, meson_console_putchar);
+
+	if (locked)
+		spin_unlock(&port->lock);
+	local_irq_restore(flags);
+}
+
+static int meson_serial_console_setup(struct console *co, char *options)
+{
+	struct uart_port *port;
+	int baud = 115200;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+
+	if (co->index < 0 || co->index >= AML_UART_PORT_NUM)
+		return -EINVAL;
+
+	port = meson_ports[co->index];
+	if (!port || !port->membase)
+		return -ENODEV;
+
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+	return uart_set_options(port, co, baud, parity, bits, flow);
+}
+
+static struct console meson_serial_console = {
+	.name		= AML_UART_DEV_NAME,
+	.write		= meson_serial_console_write,
+	.device		= uart_console_device,
+	.setup		= meson_serial_console_setup,
+	.flags		= CON_PRINTBUFFER,
+	.index		= -1,
+	.data		= &meson_uart_driver,
+};
+
+static int __init meson_serial_console_init(void)
+{
+	register_console(&meson_serial_console);
+	return 0;
+}
+console_initcall(meson_serial_console_init);
+
+#define MESON_SERIAL_CONSOLE	(&meson_serial_console)
+#else
+#define MESON_SERIAL_CONSOLE	NULL
+#endif
+
+static struct uart_driver meson_uart_driver = {
+	.owner		= THIS_MODULE,
+	.driver_name	= "meson_uart",
+	.dev_name	= AML_UART_DEV_NAME,
+	.nr		= AML_UART_PORT_NUM,
+	.cons		= MESON_SERIAL_CONSOLE,
+};
+
+static int meson_uart_probe(struct platform_device *pdev)
+{
+	struct resource *res_mem, *res_irq;
+	struct uart_port *port;
+	struct clk *clk;
+	int ret = 0;
+
+	if (pdev->dev.of_node)
+		pdev->id = of_alias_get_id(pdev->dev.of_node, "serial");
+
+	if (pdev->id < 0 || pdev->id >= AML_UART_PORT_NUM)
+		return -EINVAL;
+
+	res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res_mem)
+		return -ENODEV;
+
+	res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res_irq)
+		return -ENODEV;
+
+	if (meson_ports[pdev->id]) {
+		dev_err(&pdev->dev, "port %d already allocated\n", pdev->id);
+		return -EBUSY;
+	}
+
+	port = devm_kzalloc(&pdev->dev, sizeof(struct uart_port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+
+	clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	port->uartclk = clk_get_rate(clk);
+	port->iotype = UPIO_MEM;
+	port->mapbase = res_mem->start;
+	port->irq = res_irq->start;
+	port->flags = UPF_BOOT_AUTOCONF | UPF_IOREMAP | UPF_LOW_LATENCY;
+	port->dev = &pdev->dev;
+	port->line = pdev->id;
+	port->type = PORT_MESON;
+	port->x_char = 0;
+	port->ops = &meson_uart_ops;
+	port->fifosize = 64;
+
+	meson_ports[pdev->id] = port;
+	platform_set_drvdata(pdev, port);
+
+	ret = uart_add_one_port(&meson_uart_driver, port);
+	if (ret)
+		meson_ports[pdev->id] = NULL;
+
+	return ret;
+}
+
+static int meson_uart_remove(struct platform_device *pdev)
+{
+	struct uart_port *port;
+
+	port = platform_get_drvdata(pdev);
+	uart_remove_one_port(&meson_uart_driver, port);
+	meson_ports[pdev->id] = NULL;
+
+	return 0;
+}
+
+
+static const struct of_device_id meson_uart_dt_match[] = {
+	{ .compatible = "amlogic,meson-uart" },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, meson_uart_dt_match);
+
+static  struct platform_driver meson_uart_platform_driver = {
+	.probe		= meson_uart_probe,
+	.remove		= meson_uart_remove,
+	.driver		= {
+		.owner		= THIS_MODULE,
+		.name		= "meson_uart",
+		.of_match_table	= meson_uart_dt_match,
+	},
+};
+
+static int __init meson_uart_init(void)
+{
+	int ret;
+
+	ret = uart_register_driver(&meson_uart_driver);
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&meson_uart_platform_driver);
+	if (ret)
+		uart_unregister_driver(&meson_uart_driver);
+
+	return ret;
+}
+
+static void __exit meson_uart_exit(void)
+{
+	platform_driver_unregister(&meson_uart_platform_driver);
+	uart_unregister_driver(&meson_uart_driver);
+}
+
+module_init(meson_uart_init);
+module_exit(meson_uart_exit);
+
+MODULE_AUTHOR("Carlo Caione <carlo@caione.org>");
+MODULE_DESCRIPTION("Amlogic Meson serial port driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index 5820269aa132..16ad8521af6a 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -244,4 +244,7 @@
 /* SC16IS74xx */
 #define PORT_SC16IS7XX   108
 
+/* MESON */
+#define PORT_MESON	109
+
 #endif /* _UAPILINUX_SERIAL_CORE_H */
-- 
cgit v1.2.3


From f3dbd802b3caf8da92173870bc270dda6b3f84ba Mon Sep 17 00:00:00 2001
From: Rajat Jain <rajatxjain@gmail.com>
Date: Tue, 2 Sep 2014 16:26:00 -0700
Subject: PCI: Enable CRS Software Visibility for root port if it is supported

Per PCIe r3.0, sec 2.3.2, an endpoint may respond to a Configuration
Request with a Completion with Configuration Request Retry Status (CRS).
This terminates the Configuration Request.

When the CRS Software Visibility feature is disabled (as it is by default),
a Root Complex must handle a CRS Completion by re-issuing the Configuration
Request.  This is invisible to software.  From the CPU's point of view, an
endpoint that always responds with CRS causes a hang because the Root
Complex never supplies data to complete the CPU read.

When CRS Software Visibility is enabled, a Root Complex that receives a CRS
Completion for a read of the Vendor ID must return data of 0x0001.  The
Vendor ID of 0x0001 indicates to software that the endpoint is not ready.

We now have more devices that require CRS Software Visibility.  For
example, a PLX 8713 NT bridge may respond with CRS until it has been
configured via I2C, and the I2C configuration is completely independent of
PCI enumeration.

Enable CRS Software Visibility if it is supported.  This allows a system
with such a device to work (though the PCI core times out waiting for it to
become ready, and we have to rescan the bus after it is ready).

This essentially reverts ad7edfe04908 ("[PCI] Do not enable CRS Software
Visibility by default").  The failures that led to ad7edfe04908 should be
addressed by 89665a6a7140 ("PCI: Check only the Vendor ID to identify
Configuration Request Retry").

[bhelgaas: changelog]
Link: http://lkml.kernel.org/r/20071029061532.5d10dfc6@snowcone
Link: http://lkml.kernel.org/r/alpine.LFD.0.9999.0712271023090.21557@woody.linux-foundation.org
Signed-off-by: Rajat Jain <rajatxjain@gmail.com>
Signed-off-by: Rajat Jain <rajatjain@juniper.net>
Signed-off-by: Guenter Roeck <groeck@juniper.net>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/probe.c           | 13 +++++++++++++
 include/uapi/linux/pci_regs.h |  1 +
 2 files changed, 14 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 9c26663c91d2..e02cdaa5bf0c 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -740,6 +740,17 @@ struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
 }
 EXPORT_SYMBOL(pci_add_new_bus);
 
+static void pci_enable_crs(struct pci_dev *pdev)
+{
+	u16 root_cap = 0;
+
+	/* Enable CRS Software Visibility if supported */
+	pcie_capability_read_word(pdev, PCI_EXP_RTCAP, &root_cap);
+	if (root_cap & PCI_EXP_RTCAP_CRSVIS)
+		pcie_capability_set_word(pdev, PCI_EXP_RTCTL,
+					 PCI_EXP_RTCTL_CRSSVE);
+}
+
 /*
  * If it's a bridge, configure it and scan the bus behind it.
  * For CardBus bridges, we don't scan behind as the devices will
@@ -787,6 +798,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, int pass)
 	pci_write_config_word(dev, PCI_BRIDGE_CONTROL,
 			      bctl & ~PCI_BRIDGE_CTL_MASTER_ABORT);
 
+	pci_enable_crs(dev);
+
 	if ((secondary || subordinate) && !pcibios_assign_all_busses() &&
 	    !is_cardbus && !broken) {
 		unsigned int cmax;
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 30db069bce62..57d6e56a086a 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -552,6 +552,7 @@
 #define  PCI_EXP_RTCTL_PMEIE	0x0008	/* PME Interrupt Enable */
 #define  PCI_EXP_RTCTL_CRSSVE	0x0010	/* CRS Software Visibility Enable */
 #define PCI_EXP_RTCAP		30	/* Root Capabilities */
+#define  PCI_EXP_RTCAP_CRSVIS	0x0001	/* CRS Software Visibility capability */
 #define PCI_EXP_RTSTA		32	/* Root Status */
 #define PCI_EXP_RTSTA_PME	0x00010000 /* PME status */
 #define PCI_EXP_RTSTA_PENDING	0x00020000 /* PME pending */
-- 
cgit v1.2.3


From 3045d76070abe725dbb7fd8ff39c27b820d5a7eb Mon Sep 17 00:00:00 2001
From: Ana Rey <anarey@gmail.com>
Date: Tue, 2 Sep 2014 20:36:14 +0200
Subject: netfilter: nf_tables: add devgroup support in meta expresion

Add devgroup support to let us match device group of a packets incoming
or outgoing interface.

Signed-off-by: Ana Rey <anarey@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  4 ++++
 net/netfilter/nft_meta.c                 | 12 ++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index c9b6f00a3fb7..c000947d3f38 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -573,6 +573,8 @@ enum nft_exthdr_attributes {
  * @NFT_META_BRI_OIFNAME: packet output bridge interface name
  * @NFT_META_PKTTYPE: packet type (skb->pkt_type), special handling for loopback
  * @NFT_META_CPU: cpu id through smp_processor_id()
+ * @NFT_META_IIFGROUP: packet input interface group
+ * @NFT_META_OIFGROUP: packet output interface group
  */
 enum nft_meta_keys {
 	NFT_META_LEN,
@@ -596,6 +598,8 @@ enum nft_meta_keys {
 	NFT_META_BRI_OIFNAME,
 	NFT_META_PKTTYPE,
 	NFT_META_CPU,
+	NFT_META_IIFGROUP,
+	NFT_META_OIFGROUP,
 };
 
 /**
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 843e099a962d..1e7c076ca63a 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -155,6 +155,16 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 	case NFT_META_CPU:
 		dest->data[0] = smp_processor_id();
 		break;
+	case NFT_META_IIFGROUP:
+		if (in == NULL)
+			goto err;
+		dest->data[0] = in->group;
+		break;
+	case NFT_META_OIFGROUP:
+		if (out == NULL)
+			goto err;
+		dest->data[0] = out->group;
+		break;
 	default:
 		WARN_ON(1);
 		goto err;
@@ -228,6 +238,8 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
 #endif
 	case NFT_META_PKTTYPE:
 	case NFT_META_CPU:
+	case NFT_META_IIFGROUP:
+	case NFT_META_OIFGROUP:
 		break;
 	default:
 		return -EOPNOTSUPP;
-- 
cgit v1.2.3


From e42eff8a32f8b7bde88ea3c5a56391407cbe84f3 Mon Sep 17 00:00:00 2001
From: Arturo Borrero <arturo.borrero.glez@gmail.com>
Date: Thu, 4 Sep 2014 14:06:14 +0200
Subject: netfilter: nft_nat: include a flag attribute

Both SNAT and DNAT (and the upcoming masquerade) can have additional
configuration parameters, such as port randomization and NAT addressing
persistence. We can cover these scenarios by simply adding a flag
attribute for userspace to fill when needed.

The flags to use are defined in include/uapi/linux/netfilter/nf_nat.h:

 NF_NAT_RANGE_MAP_IPS
 NF_NAT_RANGE_PROTO_SPECIFIED
 NF_NAT_RANGE_PROTO_RANDOM
 NF_NAT_RANGE_PERSISTENT
 NF_NAT_RANGE_PROTO_RANDOM_FULLY
 NF_NAT_RANGE_PROTO_RANDOM_ALL

The caller must take care of not messing up with the flags, as they are
added unconditionally to the final resulting nf_nat_range.

Signed-off-by: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_nat.h    |  5 +++++
 include/uapi/linux/netfilter/nf_tables.h |  2 ++
 net/netfilter/nft_nat.c                  | 16 ++++++++++++++++
 3 files changed, 23 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h
index 1ad3659102b6..0880781ad7b6 100644
--- a/include/uapi/linux/netfilter/nf_nat.h
+++ b/include/uapi/linux/netfilter/nf_nat.h
@@ -13,6 +13,11 @@
 #define NF_NAT_RANGE_PROTO_RANDOM_ALL		\
 	(NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
 
+#define NF_NAT_RANGE_MASK					\
+	(NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED |	\
+	 NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT |	\
+	 NF_NAT_RANGE_PROTO_RANDOM_FULLY)
+
 struct nf_nat_ipv4_range {
 	unsigned int			flags;
 	__be32				min_ip;
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index c000947d3f38..6022c6e6be18 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -785,6 +785,7 @@ enum nft_nat_types {
  * @NFTA_NAT_REG_ADDR_MAX: source register of address range end (NLA_U32: nft_registers)
  * @NFTA_NAT_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers)
  * @NFTA_NAT_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers)
+ * @NFTA_NAT_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32)
  */
 enum nft_nat_attributes {
 	NFTA_NAT_UNSPEC,
@@ -794,6 +795,7 @@ enum nft_nat_attributes {
 	NFTA_NAT_REG_ADDR_MAX,
 	NFTA_NAT_REG_PROTO_MIN,
 	NFTA_NAT_REG_PROTO_MAX,
+	NFTA_NAT_FLAGS,
 	__NFTA_NAT_MAX
 };
 #define NFTA_NAT_MAX		(__NFTA_NAT_MAX - 1)
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 79ff58cd36dc..799550b476fb 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -33,6 +33,7 @@ struct nft_nat {
 	enum nft_registers      sreg_proto_max:8;
 	enum nf_nat_manip_type  type:8;
 	u8			family;
+	u16			flags;
 };
 
 static void nft_nat_eval(const struct nft_expr *expr,
@@ -71,6 +72,8 @@ static void nft_nat_eval(const struct nft_expr *expr,
 		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 	}
 
+	range.flags |= priv->flags;
+
 	data[NFT_REG_VERDICT].verdict =
 		nf_nat_setup_info(ct, &range, priv->type);
 }
@@ -82,6 +85,7 @@ static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
 	[NFTA_NAT_REG_ADDR_MAX]	 = { .type = NLA_U32 },
 	[NFTA_NAT_REG_PROTO_MIN] = { .type = NLA_U32 },
 	[NFTA_NAT_REG_PROTO_MAX] = { .type = NLA_U32 },
+	[NFTA_NAT_FLAGS]	 = { .type = NLA_U32 },
 };
 
 static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
@@ -149,6 +153,12 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	} else
 		priv->sreg_proto_max = priv->sreg_proto_min;
 
+	if (tb[NFTA_NAT_FLAGS]) {
+		priv->flags = ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS]));
+		if (priv->flags & ~NF_NAT_RANGE_MASK)
+			return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -183,6 +193,12 @@ static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
 				 htonl(priv->sreg_proto_max)))
 			goto nla_put_failure;
 	}
+
+	if (priv->flags != 0) {
+		if (nla_put_be32(skb, NFTA_NAT_FLAGS, htonl(priv->flags)))
+			goto nla_put_failure;
+	}
+
 	return 0;
 
 nla_put_failure:
-- 
cgit v1.2.3


From 9ba1f726bec090399eb9bb9157eb32dedc8e8c45 Mon Sep 17 00:00:00 2001
From: Arturo Borrero <arturo.borrero.glez@gmail.com>
Date: Mon, 8 Sep 2014 13:45:00 +0200
Subject: netfilter: nf_tables: add new nft_masq expression

The nft_masq expression is intended to perform NAT in the masquerade flavour.

We decided to have the masquerade functionality in a separated expression other
than nft_nat.

Signed-off-by: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nft_masq.h         | 16 ++++++
 include/uapi/linux/netfilter/nf_tables.h | 11 ++++
 net/ipv4/netfilter/Kconfig               |  6 +++
 net/ipv4/netfilter/Makefile              |  1 +
 net/ipv4/netfilter/nft_masq_ipv4.c       | 89 ++++++++++++++++++++++++++++++++
 net/ipv6/netfilter/Kconfig               |  6 +++
 net/ipv6/netfilter/Makefile              |  1 +
 net/ipv6/netfilter/nft_masq_ipv6.c       | 89 ++++++++++++++++++++++++++++++++
 net/netfilter/Kconfig                    |  9 ++++
 net/netfilter/Makefile                   |  1 +
 net/netfilter/nft_masq.c                 | 59 +++++++++++++++++++++
 11 files changed, 288 insertions(+)
 create mode 100644 include/net/netfilter/nft_masq.h
 create mode 100644 net/ipv4/netfilter/nft_masq_ipv4.c
 create mode 100644 net/ipv6/netfilter/nft_masq_ipv6.c
 create mode 100644 net/netfilter/nft_masq.c

(limited to 'include/uapi')

diff --git a/include/net/netfilter/nft_masq.h b/include/net/netfilter/nft_masq.h
new file mode 100644
index 000000000000..c72729f954f4
--- /dev/null
+++ b/include/net/netfilter/nft_masq.h
@@ -0,0 +1,16 @@
+#ifndef _NFT_MASQ_H_
+#define _NFT_MASQ_H_
+
+struct nft_masq {
+	u32	flags;
+};
+
+extern const struct nla_policy nft_masq_policy[];
+
+int nft_masq_init(const struct nft_ctx *ctx,
+		  const struct nft_expr *expr,
+		  const struct nlattr * const tb[]);
+
+int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr);
+
+#endif /* _NFT_MASQ_H_ */
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 6022c6e6be18..eeec0ae845ef 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -800,4 +800,15 @@ enum nft_nat_attributes {
 };
 #define NFTA_NAT_MAX		(__NFTA_NAT_MAX - 1)
 
+/**
+ * enum nft_masq_attributes - nf_tables masquerade expression attributes
+ *
+ * @NFTA_MASQ_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32)
+ */
+enum nft_masq_attributes {
+	NFTA_MASQ_FLAGS,
+	__NFTA_MASQ_MAX
+};
+#define NFTA_MASQ_MAX		(__NFTA_MASQ_MAX - 1)
+
 #endif /* _LINUX_NF_TABLES_H */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 4be3e541350e..8dd3d9f19d82 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -190,6 +190,12 @@ config NF_NAT_MASQUERADE_IPV4
 	This is the kernel functionality to provide NAT in the masquerade
 	flavour (automatic source address selection).
 
+config NFT_MASQ_IPV4
+	tristate "IPv4 masquerading support for nf_tables"
+	depends on NF_TABLES_IPV4
+	depends on NFT_MASQ
+	select NF_NAT_MASQUERADE_IPV4
+
 config IP_NF_TARGET_MASQUERADE
 	tristate "MASQUERADE target support"
 	select NF_NAT_MASQUERADE_IPV4
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 42056b2fd0e3..7d019aefb0ed 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
 obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
 obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
 obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
+obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
 obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
 
 # generic IP tables 
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
new file mode 100644
index 000000000000..6ea1d207b6a5
--- /dev/null
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_masq.h>
+#include <net/netfilter/ipv4/nf_nat_masquerade.h>
+
+static void nft_masq_ipv4_eval(const struct nft_expr *expr,
+			       struct nft_data data[NFT_REG_MAX + 1],
+			       const struct nft_pktinfo *pkt)
+{
+	struct nft_masq *priv = nft_expr_priv(expr);
+	struct nf_nat_range range;
+	unsigned int verdict;
+
+	range.flags = priv->flags;
+
+	verdict = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum,
+					 &range, pkt->out);
+
+	data[NFT_REG_VERDICT].verdict = verdict;
+}
+
+static int nft_masq_ipv4_init(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nlattr * const tb[])
+{
+	int err;
+
+	err = nft_masq_init(ctx, expr, tb);
+	if (err < 0)
+		return err;
+
+	nf_nat_masquerade_ipv4_register_notifier();
+	return 0;
+}
+
+static void nft_masq_ipv4_destroy(const struct nft_ctx *ctx,
+				  const struct nft_expr *expr)
+{
+	nf_nat_masquerade_ipv4_unregister_notifier();
+}
+
+static struct nft_expr_type nft_masq_ipv4_type;
+static const struct nft_expr_ops nft_masq_ipv4_ops = {
+	.type		= &nft_masq_ipv4_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_masq)),
+	.eval		= nft_masq_ipv4_eval,
+	.init		= nft_masq_ipv4_init,
+	.destroy	= nft_masq_ipv4_destroy,
+	.dump		= nft_masq_dump,
+};
+
+static struct nft_expr_type nft_masq_ipv4_type __read_mostly = {
+	.family		= NFPROTO_IPV4,
+	.name		= "masq",
+	.ops		= &nft_masq_ipv4_ops,
+	.policy		= nft_masq_policy,
+	.maxattr	= NFTA_MASQ_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_masq_ipv4_module_init(void)
+{
+	return nft_register_expr(&nft_masq_ipv4_type);
+}
+
+static void __exit nft_masq_ipv4_module_exit(void)
+{
+	nft_unregister_expr(&nft_masq_ipv4_type);
+}
+
+module_init(nft_masq_ipv4_module_init);
+module_exit(nft_masq_ipv4_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "masq");
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 6c8cfec6836a..24c535f66df0 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -252,6 +252,12 @@ config NF_NAT_MASQUERADE_IPV6
 	 This is the kernel functionality to provide NAT in the masquerade
 	 flavour (automatic source address selection) for IPv6.
 
+config NFT_MASQ_IPV6
+	tristate "IPv6 masquerade support for nf_tables"
+	depends on NF_TABLES_IPV6
+	depends on NFT_MASQ
+	select NF_NAT_MASQUERADE_IPV6
+
 config IP6_NF_TARGET_MASQUERADE
 	tristate "MASQUERADE target support"
 	select NF_NAT_MASQUERADE_IPV6
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 89a0bd751f82..482c4dff273f 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
 obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
 obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
 obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
+obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o
 
 # matches
 obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
new file mode 100644
index 000000000000..4e51334ef6b7
--- /dev/null
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nft_masq.h>
+#include <net/netfilter/ipv6/nf_nat_masquerade.h>
+
+static void nft_masq_ipv6_eval(const struct nft_expr *expr,
+			       struct nft_data data[NFT_REG_MAX + 1],
+			       const struct nft_pktinfo *pkt)
+{
+	struct nft_masq *priv = nft_expr_priv(expr);
+	struct nf_nat_range range;
+	unsigned int verdict;
+
+	range.flags = priv->flags;
+
+	verdict = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out);
+
+	data[NFT_REG_VERDICT].verdict = verdict;
+}
+
+static int nft_masq_ipv6_init(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nlattr * const tb[])
+{
+	int err;
+
+	err = nft_masq_init(ctx, expr, tb);
+	if (err < 0)
+		return err;
+
+	nf_nat_masquerade_ipv6_register_notifier();
+	return 0;
+}
+
+static void nft_masq_ipv6_destroy(const struct nft_ctx *ctx,
+				  const struct nft_expr *expr)
+{
+	nf_nat_masquerade_ipv6_unregister_notifier();
+}
+
+static struct nft_expr_type nft_masq_ipv6_type;
+static const struct nft_expr_ops nft_masq_ipv6_ops = {
+	.type		= &nft_masq_ipv6_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_masq)),
+	.eval		= nft_masq_ipv6_eval,
+	.init		= nft_masq_ipv6_init,
+	.destroy	= nft_masq_ipv6_destroy,
+	.dump		= nft_masq_dump,
+};
+
+static struct nft_expr_type nft_masq_ipv6_type __read_mostly = {
+	.family		= NFPROTO_IPV6,
+	.name		= "masq",
+	.ops		= &nft_masq_ipv6_ops,
+	.policy		= nft_masq_policy,
+	.maxattr	= NFTA_MASQ_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_masq_ipv6_module_init(void)
+{
+	return nft_register_expr(&nft_masq_ipv6_type);
+}
+
+static void __exit nft_masq_ipv6_module_exit(void)
+{
+	nft_unregister_expr(&nft_masq_ipv6_type);
+}
+
+module_init(nft_masq_ipv6_module_init);
+module_exit(nft_masq_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq");
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index ad751fe2e82b..37428723394f 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -496,6 +496,15 @@ config NFT_LIMIT
 	  This option adds the "limit" expression that you can use to
 	  ratelimit rule matchings.
 
+config NFT_MASQ
+	depends on NF_TABLES
+	depends on NF_CONNTRACK
+	depends on NF_NAT
+	tristate "Netfilter nf_tables masquerade support"
+	help
+	  This option adds the "masquerade" expression that you can use
+	  to perform NAT in the masquerade flavour.
+
 config NFT_NAT
 	depends on NF_TABLES
 	depends on NF_CONNTRACK
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 8308624a406a..0637792f6faf 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -87,6 +87,7 @@ obj-$(CONFIG_NFT_RBTREE)	+= nft_rbtree.o
 obj-$(CONFIG_NFT_HASH)		+= nft_hash.o
 obj-$(CONFIG_NFT_COUNTER)	+= nft_counter.o
 obj-$(CONFIG_NFT_LOG)		+= nft_log.o
+obj-$(CONFIG_NFT_MASQ)		+= nft_masq.o
 
 # generic X tables 
 obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
new file mode 100644
index 000000000000..6637bab00567
--- /dev/null
+++ b/net/netfilter/nft_masq.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nft_masq.h>
+
+const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
+	[NFTA_MASQ_FLAGS]	= { .type = NLA_U32 },
+};
+EXPORT_SYMBOL_GPL(nft_masq_policy);
+
+int nft_masq_init(const struct nft_ctx *ctx,
+		  const struct nft_expr *expr,
+		  const struct nlattr * const tb[])
+{
+	struct nft_masq *priv = nft_expr_priv(expr);
+
+	if (tb[NFTA_MASQ_FLAGS] == NULL)
+		return 0;
+
+	priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
+	if (priv->flags & ~NF_NAT_RANGE_MASK)
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_masq_init);
+
+int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_masq *priv = nft_expr_priv(expr);
+
+	if (priv->flags == 0)
+		return 0;
+
+	if (nla_put_be32(skb, NFTA_MASQ_FLAGS, htonl(priv->flags)))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+EXPORT_SYMBOL_GPL(nft_masq_dump);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
-- 
cgit v1.2.3


From c559a353410939c0884e83bdb0e2420a986ac53b Mon Sep 17 00:00:00 2001
From: Robert Baldyga <r.baldyga@samsung.com>
Date: Tue, 9 Sep 2014 08:23:16 +0200
Subject: usb: gadget: f_fs: add ioctl returning ep descriptor

This patch introduces ioctl named FUNCTIONFS_ENDPOINT_DESC, which
returns endpoint descriptor to userspace. It works only if function
is active.

Signed-off-by: Robert Baldyga <r.baldyga@samsung.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/function/f_fs.c  | 23 +++++++++++++++++++++++
 include/uapi/linux/usb/functionfs.h |  6 ++++++
 2 files changed, 29 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 1aad353c1f11..a345385a5abe 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1026,6 +1026,29 @@ static long ffs_epfile_ioctl(struct file *file, unsigned code,
 		case FUNCTIONFS_ENDPOINT_REVMAP:
 			ret = epfile->ep->num;
 			break;
+		case FUNCTIONFS_ENDPOINT_DESC:
+		{
+			int desc_idx;
+			struct usb_endpoint_descriptor *desc;
+
+			switch (epfile->ffs->gadget->speed) {
+			case USB_SPEED_SUPER:
+				desc_idx = 2;
+				break;
+			case USB_SPEED_HIGH:
+				desc_idx = 1;
+				break;
+			default:
+				desc_idx = 0;
+			}
+			desc = epfile->ep->descs[desc_idx];
+
+			spin_unlock_irq(&epfile->ffs->eps_lock);
+			ret = copy_to_user((void *)value, desc, sizeof(*desc));
+			if (ret)
+				ret = -EFAULT;
+			return ret;
+		}
 		default:
 			ret = -ENOTTY;
 		}
diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index 6d2a16b834bf..7c7a2feb0e6e 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -275,6 +275,12 @@ struct usb_functionfs_event {
  */
 #define	FUNCTIONFS_ENDPOINT_REVMAP	_IO('g', 129)
 
+/*
+ * Returns endpoint descriptor. If function is not active returns -ENODEV.
+ */
+#define	FUNCTIONFS_ENDPOINT_DESC	_IOR('g', 130, \
+					     struct usb_endpoint_descriptor)
+
 
 
 #endif /* _UAPI__LINUX_FUNCTIONFS_H__ */
-- 
cgit v1.2.3


From daedfb22451dd02b35c0549566cbb7cc06bdd53b Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 4 Sep 2014 22:17:18 -0700
Subject: net: filter: split filter.h and expose eBPF to user space

allow user space to generate eBPF programs

uapi/linux/bpf.h: eBPF instruction set definition

linux/filter.h: the rest

This patch only moves macro definitions, but practically it freezes existing
eBPF instruction set, though new instructions can still be added in the future.

These eBPF definitions cannot go into uapi/linux/filter.h, since the names
may conflict with existing applications.

Full eBPF ISA description is in Documentation/networking/filter.txt

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h    | 56 +---------------------------------------
 include/uapi/linux/Kbuild |  1 +
 include/uapi/linux/bpf.h  | 65 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 67 insertions(+), 55 deletions(-)
 create mode 100644 include/uapi/linux/bpf.h

(limited to 'include/uapi')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index bf323da77950..8f82ef3f1cdd 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -10,58 +10,12 @@
 #include <linux/workqueue.h>
 #include <uapi/linux/filter.h>
 #include <asm/cacheflush.h>
+#include <uapi/linux/bpf.h>
 
 struct sk_buff;
 struct sock;
 struct seccomp_data;
 
-/* Internally used and optimized filter representation with extended
- * instruction set based on top of classic BPF.
- */
-
-/* instruction classes */
-#define BPF_ALU64	0x07	/* alu mode in double word width */
-
-/* ld/ldx fields */
-#define BPF_DW		0x18	/* double word */
-#define BPF_XADD	0xc0	/* exclusive add */
-
-/* alu/jmp fields */
-#define BPF_MOV		0xb0	/* mov reg to reg */
-#define BPF_ARSH	0xc0	/* sign extending arithmetic shift right */
-
-/* change endianness of a register */
-#define BPF_END		0xd0	/* flags for endianness conversion: */
-#define BPF_TO_LE	0x00	/* convert to little-endian */
-#define BPF_TO_BE	0x08	/* convert to big-endian */
-#define BPF_FROM_LE	BPF_TO_LE
-#define BPF_FROM_BE	BPF_TO_BE
-
-#define BPF_JNE		0x50	/* jump != */
-#define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
-#define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
-#define BPF_CALL	0x80	/* function call */
-#define BPF_EXIT	0x90	/* function return */
-
-/* Register numbers */
-enum {
-	BPF_REG_0 = 0,
-	BPF_REG_1,
-	BPF_REG_2,
-	BPF_REG_3,
-	BPF_REG_4,
-	BPF_REG_5,
-	BPF_REG_6,
-	BPF_REG_7,
-	BPF_REG_8,
-	BPF_REG_9,
-	BPF_REG_10,
-	__MAX_BPF_REG,
-};
-
-/* BPF has 10 general purpose 64-bit registers and stack frame. */
-#define MAX_BPF_REG	__MAX_BPF_REG
-
 /* ArgX, context and stack frame pointer register positions. Note,
  * Arg1, Arg2, Arg3, etc are used as argument mappings of function
  * calls in BPF_CALL instruction.
@@ -322,14 +276,6 @@ enum {
 #define SK_RUN_FILTER(filter, ctx) \
 	(*filter->prog->bpf_func)(ctx, filter->prog->insnsi)
 
-struct bpf_insn {
-	__u8	code;		/* opcode */
-	__u8	dst_reg:4;	/* dest register */
-	__u8	src_reg:4;	/* source register */
-	__s16	off;		/* signed offset */
-	__s32	imm;		/* signed immediate constant */
-};
-
 #ifdef CONFIG_COMPAT
 /* A struct sock_filter is architecture independent. */
 struct compat_sock_fprog {
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 24e9033f8b3f..fb3f7b675229 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -67,6 +67,7 @@ header-y += bfs_fs.h
 header-y += binfmts.h
 header-y += blkpg.h
 header-y += blktrace_api.h
+header-y += bpf.h
 header-y += bpqether.h
 header-y += bsg.h
 header-y += btrfs.h
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
new file mode 100644
index 000000000000..479ed0b6be16
--- /dev/null
+++ b/include/uapi/linux/bpf.h
@@ -0,0 +1,65 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_H__
+#define _UAPI__LINUX_BPF_H__
+
+#include <linux/types.h>
+
+/* Extended instruction set based on top of classic BPF */
+
+/* instruction classes */
+#define BPF_ALU64	0x07	/* alu mode in double word width */
+
+/* ld/ldx fields */
+#define BPF_DW		0x18	/* double word */
+#define BPF_XADD	0xc0	/* exclusive add */
+
+/* alu/jmp fields */
+#define BPF_MOV		0xb0	/* mov reg to reg */
+#define BPF_ARSH	0xc0	/* sign extending arithmetic shift right */
+
+/* change endianness of a register */
+#define BPF_END		0xd0	/* flags for endianness conversion: */
+#define BPF_TO_LE	0x00	/* convert to little-endian */
+#define BPF_TO_BE	0x08	/* convert to big-endian */
+#define BPF_FROM_LE	BPF_TO_LE
+#define BPF_FROM_BE	BPF_TO_BE
+
+#define BPF_JNE		0x50	/* jump != */
+#define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
+#define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
+#define BPF_CALL	0x80	/* function call */
+#define BPF_EXIT	0x90	/* function return */
+
+/* Register numbers */
+enum {
+	BPF_REG_0 = 0,
+	BPF_REG_1,
+	BPF_REG_2,
+	BPF_REG_3,
+	BPF_REG_4,
+	BPF_REG_5,
+	BPF_REG_6,
+	BPF_REG_7,
+	BPF_REG_8,
+	BPF_REG_9,
+	BPF_REG_10,
+	__MAX_BPF_REG,
+};
+
+/* BPF has 10 general purpose 64-bit registers and stack frame. */
+#define MAX_BPF_REG	__MAX_BPF_REG
+
+struct bpf_insn {
+	__u8	code;		/* opcode */
+	__u8	dst_reg:4;	/* dest register */
+	__u8	src_reg:4;	/* source register */
+	__s16	off;		/* signed offset */
+	__s32	imm;		/* signed immediate constant */
+};
+
+#endif /* _UAPI__LINUX_BPF_H__ */
-- 
cgit v1.2.3


From e5c3ea5c668033b303e7ac835d7d91da32d97958 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Fri, 5 Sep 2014 15:51:31 +0200
Subject: bridge: implement rtnl_link_ops->get_size and
 rtnl_link_ops->fill_info

Allow rtnetlink users to get bridge master info in IFLA_INFO_DATA attr
This initial part implements forward_delay, hello_time, max_age options.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h | 12 ++++++++++++
 net/bridge/br_netlink.c      | 25 +++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index ff957604a721..c80f95f6ee78 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -215,6 +215,18 @@ enum in6_addr_gen_mode {
 	IN6_ADDR_GEN_MODE_NONE,
 };
 
+/* Bridge section */
+
+enum {
+	IFLA_BR_UNSPEC,
+	IFLA_BR_FORWARD_DELAY,
+	IFLA_BR_HELLO_TIME,
+	IFLA_BR_MAX_AGE,
+	__IFLA_BR_MAX,
+};
+
+#define IFLA_BR_MAX	(__IFLA_BR_MAX - 1)
+
 enum {
 	BRIDGE_MODE_UNSPEC,
 	BRIDGE_MODE_HAIRPIN,
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 05aeea1222a7..bcac09fe2ac8 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -484,6 +484,29 @@ static size_t br_port_get_slave_size(const struct net_device *brdev,
 	return br_port_info_size();
 }
 
+static size_t br_get_size(const struct net_device *brdev)
+{
+	return nla_total_size(sizeof(u32)) +	/* IFLA_BR_FORWARD_DELAY  */
+	       nla_total_size(sizeof(u32)) +	/* IFLA_BR_HELLO_TIME */
+	       nla_total_size(sizeof(u32)) +	/* IFLA_BR_MAX_AGE */
+	       0;
+}
+
+static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
+{
+	struct net_bridge *br = netdev_priv(brdev);
+	u32 forward_delay = jiffies_to_clock_t(br->forward_delay);
+	u32 hello_time = jiffies_to_clock_t(br->hello_time);
+	u32 age_time = jiffies_to_clock_t(br->max_age);
+
+	if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) ||
+	    nla_put_u32(skb, IFLA_BR_HELLO_TIME, hello_time) ||
+	    nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
 static size_t br_get_link_af_size(const struct net_device *dev)
 {
 	struct net_port_vlans *pv;
@@ -514,6 +537,8 @@ struct rtnl_link_ops br_link_ops __read_mostly = {
 	.validate		= br_validate,
 	.newlink		= br_dev_newlink,
 	.dellink		= br_dev_delete,
+	.get_size		= br_get_size,
+	.fill_info		= br_fill_info,
 
 	.slave_maxtype		= IFLA_BRPORT_MAX,
 	.slave_policy		= br_port_policy,
-- 
cgit v1.2.3


From c858403943886a92eece9d0413aa65c48bbe6fa7 Mon Sep 17 00:00:00 2001
From: Michel Dänzer <michel.daenzer@amd.com>
Date: Thu, 28 Aug 2014 15:56:00 +0900
Subject: drm/radeon: Add RADEON_GEM_CPU_ACCESS BO creation flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This flag is a hint that userspace expects the BO to be accessed by the
CPU. We can use that hint to prevent such BOs from ever being stored in
the CPU inaccessible part of VRAM.

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_object.c | 11 +++++++++--
 include/uapi/drm/radeon_drm.h          |  2 ++
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index aadbd36e64b9..eef60aaf4e64 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -144,7 +144,12 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 
 	for (i = 0; i < c; ++i) {
 		rbo->placements[i].fpfn = 0;
-		rbo->placements[i].lpfn = 0;
+		if ((rbo->flags & RADEON_GEM_CPU_ACCESS) &&
+		    (rbo->placements[i].flags & TTM_PL_FLAG_VRAM))
+			rbo->placements[i].lpfn =
+				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
+		else
+			rbo->placements[i].lpfn = 0;
 	}
 
 	/*
@@ -152,7 +157,9 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 	 * improve fragmentation quality.
 	 * 512kb was measured as the most optimal number.
 	 */
-	if (rbo->tbo.mem.size > 512 * 1024) {
+	if (!((rbo->flags & RADEON_GEM_CPU_ACCESS) &&
+	      (rbo->placements[i].flags & TTM_PL_FLAG_VRAM)) &&
+	    rbo->tbo.mem.size > 512 * 1024) {
 		for (i = 0; i < c; i++) {
 			rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
 		}
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 375b6e656c54..f755f20d2b5c 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -801,6 +801,8 @@ struct drm_radeon_gem_info {
 #define RADEON_GEM_NO_BACKING_STORE	(1 << 0)
 #define RADEON_GEM_GTT_UC		(1 << 1)
 #define RADEON_GEM_GTT_WC		(1 << 2)
+/* BO is expected to be accessed by the CPU */
+#define RADEON_GEM_CPU_ACCESS		(1 << 3)
 
 struct drm_radeon_gem_create {
 	uint64_t	size;
-- 
cgit v1.2.3


From f266f04d33e5265e2f61ffc9d2b2f97214804995 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 28 Aug 2014 10:59:05 -0400
Subject: drm/radeon: add RADEON_GEM_NO_CPU_ACCESS BO creation flag (v4)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allows pinning of buffers in the non-CPU visible portion of
vram.

v2: incorporate Michel's comments.
v3: rebase on Michel's patch
v4: rebase on Michel's v2 patch

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 drivers/gpu/drm/radeon/radeon_object.c | 1 +
 include/uapi/drm/radeon_drm.h          | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 3dbbd65336d5..8abee5fa93bd 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -313,6 +313,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 	for (i = 0; i < bo->placement.num_placement; i++) {
 		/* force to pin into visible video ram */
 		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
+		    !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
 		    (!max_offset || max_offset > bo->rdev->mc.visible_vram_size))
 			bo->placements[i].lpfn =
 				bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index f755f20d2b5c..50d0fb41a3bf 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -803,6 +803,8 @@ struct drm_radeon_gem_info {
 #define RADEON_GEM_GTT_WC		(1 << 2)
 /* BO is expected to be accessed by the CPU */
 #define RADEON_GEM_CPU_ACCESS		(1 << 3)
+/* CPU access is not expected to work for this BO */
+#define RADEON_GEM_NO_CPU_ACCESS	(1 << 4)
 
 struct drm_radeon_gem_create {
 	uint64_t	size;
-- 
cgit v1.2.3


From b01d072065b6f36550f486fe77f05b092225ba1b Mon Sep 17 00:00:00 2001
From: David Drysdale <drysdale@google.com>
Date: Tue, 9 Sep 2014 14:50:57 -0700
Subject: shm: add memfd.h to UAPI export list

The new header file memfd.h from commit 9183df25fe7b ("shm: add
memfd_create() syscall") should be exported.

Signed-off-by: David Drysdale <drysdale@google.com>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 24e9033f8b3f..939df6973102 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -240,6 +240,7 @@ header-y += matroxfb.h
 header-y += mdio.h
 header-y += media.h
 header-y += mei.h
+header-y += memfd.h
 header-y += mempolicy.h
 header-y += meye.h
 header-y += mic_common.h
-- 
cgit v1.2.3


From 960d01acf62747d6518694f92be5b06f67473833 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 9 Sep 2014 22:55:35 +0300
Subject: cfg80211: add WMM traffic stream API

Add nl80211 and driver API to validate, add and delete traffic
streams with appropriate settings.

The API calls for userspace doing the action frame handshake
with the peer, and then allows only to set up the parameters
in the driver. To avoid setting up a session only to tear it
down again, the validate API is provided, but the real usage
later can still fail so userspace must be prepared for that.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h    |   4 ++
 include/net/cfg80211.h       |  23 ++++++++-
 include/uapi/linux/nl80211.h |  28 +++++++++++
 net/wireless/nl80211.c       | 109 +++++++++++++++++++++++++++++++++++++++++++
 net/wireless/rdev-ops.h      |  31 ++++++++++++
 net/wireless/trace.h         |  45 ++++++++++++++++++
 6 files changed, 239 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 61a09f0644aa..b1be39c76931 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -166,8 +166,12 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2)
 
 #define IEEE80211_MAX_MESH_ID_LEN	32
 
+#define IEEE80211_FIRST_TSPEC_TSID	8
 #define IEEE80211_NUM_TIDS		16
 
+/* number of user priorities 802.11 uses */
+#define IEEE80211_NUM_UPS		8
+
 #define IEEE80211_QOS_CTL_LEN		2
 /* 1d tag mask */
 #define IEEE80211_QOS_CTL_TAG1D_MASK		0x0007
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c2c710c14a50..a13beab123dc 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2318,6 +2318,17 @@ struct cfg80211_qos_map {
  * @set_ap_chanwidth: Set the AP (including P2P GO) mode channel width for the
  *	given interface This is used e.g. for dynamic HT 20/40 MHz channel width
  *	changes during the lifetime of the BSS.
+ *
+ * @add_tx_ts: validate (if admitted_time is 0) or add a TX TS to the device
+ *	with the given parameters; action frame exchange has been handled by
+ *	userspace so this just has to modify the TX path to take the TS into
+ *	account.
+ *	If the admitted time is 0 just validate the parameters to make sure
+ *	the session can be created at all; it is valid to just always return
+ *	success for that but that may result in inefficient behaviour (handshake
+ *	with the peer followed by immediate teardown when the addition is later
+ *	rejected)
+ * @del_tx_ts: remove an existing TX TS
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -2558,6 +2569,12 @@ struct cfg80211_ops {
 
 	int	(*set_ap_chanwidth)(struct wiphy *wiphy, struct net_device *dev,
 				    struct cfg80211_chan_def *chandef);
+
+	int	(*add_tx_ts)(struct wiphy *wiphy, struct net_device *dev,
+			     u8 tsid, const u8 *peer, u8 user_prio,
+			     u16 admitted_time);
+	int	(*del_tx_ts)(struct wiphy *wiphy, struct net_device *dev,
+			     u8 tsid, const u8 *peer);
 };
 
 /*
@@ -2604,9 +2621,13 @@ struct cfg80211_ops {
  * @WIPHY_FLAG_SUPPORTS_5_10_MHZ: Device supports 5 MHz and 10 MHz channels.
  * @WIPHY_FLAG_HAS_CHANNEL_SWITCH: Device supports channel switch in
  *	beaconing mode (AP, IBSS, Mesh, ...).
+ * @WIPHY_FLAG_SUPPORTS_WMM_ADMISSION: the device supports setting up WMM
+ *	TSPEC sessions (TID aka TSID 0-7) with the NL80211_CMD_ADD_TX_TS
+ *	command. Standard IEEE 802.11 TSPEC setup is not yet supported, it
+ *	needs to be able to handle Block-Ack agreements and other things.
  */
 enum wiphy_flags {
-	/* use hole at 0 */
+	WIPHY_FLAG_SUPPORTS_WMM_ADMISSION	= BIT(0),
 	/* use hole at 1 */
 	/* use hole at 2 */
 	WIPHY_FLAG_NETNS_OK			= BIT(3),
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 29c4399e08b9..e5b8caf5e466 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -722,6 +722,22 @@
  *	QoS mapping is relevant for IP packets, it is only valid during an
  *	association. This is cleared on disassociation and AP restart.
  *
+ * @NL80211_CMD_ADD_TX_TS: Ask the kernel to add a traffic stream for the given
+ *	%NL80211_ATTR_TSID and %NL80211_ATTR_MAC with %NL80211_ATTR_USER_PRIO
+ *	and %NL80211_ATTR_ADMITTED_TIME parameters.
+ *	Note that the action frame handshake with the AP shall be handled by
+ *	userspace via the normal management RX/TX framework, this only sets
+ *	up the TX TS in the driver/device.
+ *	If the admitted time attribute is not added then the request just checks
+ *	if a subsequent setup could be successful, the intent is to use this to
+ *	avoid setting up a session with the AP when local restrictions would
+ *	make that impossible. However, the subsequent "real" setup may still
+ *	fail even if the check was successful.
+ * @NL80211_CMD_DEL_TX_TS: Remove an existing TS with the %NL80211_ATTR_TSID
+ *	and %NL80211_ATTR_MAC parameters. It isn't necessary to call this
+ *	before removing a station entry entirely, or before disassociating
+ *	or similar, cleanup will happen in the driver/device in this case.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -893,6 +909,9 @@ enum nl80211_commands {
 
 	NL80211_CMD_SET_QOS_MAP,
 
+	NL80211_CMD_ADD_TX_TS,
+	NL80211_CMD_DEL_TX_TS,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -1611,6 +1630,11 @@ enum nl80211_commands {
  *	drivers to indicate dynack capability. Dynack is automatically disabled
  *	setting valid value for coverage class.
  *
+ * @NL80211_ATTR_TSID: a TSID value (u8 attribute)
+ * @NL80211_ATTR_USER_PRIO: user priority value (u8 attribute)
+ * @NL80211_ATTR_ADMITTED_TIME: admitted time in units of 32 microseconds
+ *	(per second) (u16 attribute)
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1957,6 +1981,10 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_WIPHY_DYN_ACK,
 
+	NL80211_ATTR_TSID,
+	NL80211_ATTR_USER_PRIO,
+	NL80211_ATTR_ADMITTED_TIME,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e9fbd4f4ddb0..ab7ee4893e40 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -391,6 +391,9 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_IFACE_SOCKET_OWNER] = { .type = NLA_FLAG },
 	[NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY },
 	[NL80211_ATTR_USE_RRM] = { .type = NLA_FLAG },
+	[NL80211_ATTR_TSID] = { .type = NLA_U8 },
+	[NL80211_ATTR_USER_PRIO] = { .type = NLA_U8 },
+	[NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 },
 };
 
 /* policy for the key attributes */
@@ -1510,6 +1513,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
 			if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)
 				CMD(channel_switch, CHANNEL_SWITCH);
 			CMD(set_qos_map, SET_QOS_MAP);
+			if (rdev->wiphy.flags &
+					WIPHY_FLAG_SUPPORTS_WMM_ADMISSION)
+				CMD(add_tx_ts, ADD_TX_TS);
 		}
 		/* add into the if now */
 #undef CMD
@@ -9390,6 +9396,93 @@ static int nl80211_set_qos_map(struct sk_buff *skb,
 	return ret;
 }
 
+static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	const u8 *peer;
+	u8 tsid, up;
+	u16 admitted_time = 0;
+	int err;
+
+	if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_WMM_ADMISSION))
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC] ||
+	    !info->attrs[NL80211_ATTR_USER_PRIO])
+		return -EINVAL;
+
+	tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]);
+	if (tsid >= IEEE80211_NUM_TIDS)
+		return -EINVAL;
+
+	up = nla_get_u8(info->attrs[NL80211_ATTR_USER_PRIO]);
+	if (up >= IEEE80211_NUM_UPS)
+		return -EINVAL;
+
+	/* WMM uses TIDs 0-7 even for TSPEC */
+	if (tsid < IEEE80211_FIRST_TSPEC_TSID) {
+		if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_WMM_ADMISSION))
+			return -EINVAL;
+	} else {
+		/* TODO: handle 802.11 TSPEC/admission control
+		 * need more attributes for that (e.g. BA session requirement)
+		 */
+		return -EINVAL;
+	}
+
+	peer = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+	if (info->attrs[NL80211_ATTR_ADMITTED_TIME]) {
+		admitted_time =
+			nla_get_u16(info->attrs[NL80211_ATTR_ADMITTED_TIME]);
+		if (!admitted_time)
+			return -EINVAL;
+	}
+
+	wdev_lock(wdev);
+	switch (wdev->iftype) {
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_P2P_CLIENT:
+		if (wdev->current_bss)
+			break;
+		err = -ENOTCONN;
+		goto out;
+	default:
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	err = rdev_add_tx_ts(rdev, dev, tsid, peer, up, admitted_time);
+
+ out:
+	wdev_unlock(wdev);
+	return err;
+}
+
+static int nl80211_del_tx_ts(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	const u8 *peer;
+	u8 tsid;
+	int err;
+
+	if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC])
+		return -EINVAL;
+
+	tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]);
+	peer = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+	wdev_lock(wdev);
+	err = rdev_del_tx_ts(rdev, dev, tsid, peer);
+	wdev_unlock(wdev);
+
+	return err;
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -10147,6 +10240,22 @@ static const struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_ADD_TX_TS,
+		.doit = nl80211_add_tx_ts,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_DEL_TX_TS,
+		.doit = nl80211_del_tx_ts,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 };
 
 /* notification functions */
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 56c2240c30ce..f6d457d6a558 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -915,4 +915,35 @@ rdev_set_ap_chanwidth(struct cfg80211_registered_device *rdev,
 	return ret;
 }
 
+static inline int
+rdev_add_tx_ts(struct cfg80211_registered_device *rdev,
+	       struct net_device *dev, u8 tsid, const u8 *peer,
+	       u8 user_prio, u16 admitted_time)
+{
+	int ret = -EOPNOTSUPP;
+
+	trace_rdev_add_tx_ts(&rdev->wiphy, dev, tsid, peer,
+			     user_prio, admitted_time);
+	if (rdev->ops->add_tx_ts)
+		ret = rdev->ops->add_tx_ts(&rdev->wiphy, dev, tsid, peer,
+					   user_prio, admitted_time);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+
+	return ret;
+}
+
+static inline int
+rdev_del_tx_ts(struct cfg80211_registered_device *rdev,
+	       struct net_device *dev, u8 tsid, const u8 *peer)
+{
+	int ret = -EOPNOTSUPP;
+
+	trace_rdev_del_tx_ts(&rdev->wiphy, dev, tsid, peer);
+	if (rdev->ops->del_tx_ts)
+		ret = rdev->ops->del_tx_ts(&rdev->wiphy, dev, tsid, peer);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+
+	return ret;
+}
+
 #endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 0c524cd76c83..625a6e6d1168 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1896,6 +1896,51 @@ TRACE_EVENT(rdev_set_ap_chanwidth,
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG)
 );
 
+TRACE_EVENT(rdev_add_tx_ts,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 u8 tsid, const u8 *peer, u8 user_prio, u16 admitted_time),
+	TP_ARGS(wiphy, netdev, tsid, peer, user_prio, admitted_time),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		MAC_ENTRY(peer)
+		__field(u8, tsid)
+		__field(u8, user_prio)
+		__field(u16, admitted_time)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		MAC_ASSIGN(peer, peer);
+		__entry->tsid = tsid;
+		__entry->user_prio = user_prio;
+		__entry->admitted_time = admitted_time;
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", TSID %d, UP %d, time %d",
+		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer),
+		  __entry->tsid, __entry->user_prio, __entry->admitted_time)
+);
+
+TRACE_EVENT(rdev_del_tx_ts,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 u8 tsid, const u8 *peer),
+	TP_ARGS(wiphy, netdev, tsid, peer),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		MAC_ENTRY(peer)
+		__field(u8, tsid)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		MAC_ASSIGN(peer, peer);
+		__entry->tsid = tsid;
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", TSID %d",
+		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->tsid)
+);
+
 /*************************************************************
  *	     cfg80211 exported functions traces		     *
  *************************************************************/
-- 
cgit v1.2.3


From 18998c381b19bfc3c285361ff6200ded7444aa2c Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Wed, 10 Sep 2014 14:07:34 +0300
Subject: cfg80211: allow requesting SMPS mode on ap start

Add feature bits to indicate device support for
static-smps and dynamic-smps modes.

Add a new NL80211_ATTR_SMPS_MODE attribue to allow
configuring the smps mode to be used by the ap
(e.g. configuring to ap to dynamic smps mode will
reduce power consumption while having minor effect
on throughput)

Signed-off-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  2 ++
 include/uapi/linux/nl80211.h | 33 +++++++++++++++++++++++++++++++++
 net/wireless/nl80211.c       | 24 ++++++++++++++++++++++++
 3 files changed, 59 insertions(+)

(limited to 'include/uapi')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 2ed1f80f7eb6..a2ddcf2398fd 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -664,6 +664,7 @@ struct cfg80211_acl_data {
  * @crypto: crypto settings
  * @privacy: the BSS uses privacy
  * @auth_type: Authentication type (algorithm)
+ * @smps_mode: SMPS mode
  * @inactivity_timeout: time in seconds to determine station's inactivity.
  * @p2p_ctwindow: P2P CT Window
  * @p2p_opp_ps: P2P opportunistic PS
@@ -682,6 +683,7 @@ struct cfg80211_ap_settings {
 	struct cfg80211_crypto_settings crypto;
 	bool privacy;
 	enum nl80211_auth_type auth_type;
+	enum nl80211_smps_mode smps_mode;
 	int inactivity_timeout;
 	u8 p2p_ctwindow;
 	bool p2p_opp_ps;
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index e5b8caf5e466..4b28dc07bcb1 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1635,6 +1635,9 @@ enum nl80211_commands {
  * @NL80211_ATTR_ADMITTED_TIME: admitted time in units of 32 microseconds
  *	(per second) (u16 attribute)
  *
+ * @NL80211_ATTR_SMPS_MODE: SMPS mode to use (ap mode). see
+ *	&enum nl80211_smps_mode.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1985,6 +1988,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_USER_PRIO,
 	NL80211_ATTR_ADMITTED_TIME,
 
+	NL80211_ATTR_SMPS_MODE,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -4030,6 +4035,13 @@ enum nl80211_ap_sme_features {
  * @NL80211_FEATURE_ACKTO_ESTIMATION: This driver supports dynamic ACK timeout
  *	estimation (dynack). %NL80211_ATTR_WIPHY_DYN_ACK flag attribute is used
  *	to enable dynack.
+ * @NL80211_FEATURE_STATIC_SMPS: Device supports static spatial
+ *	multiplexing powersave, ie. can turn off all but one chain
+ *	even on HT connections that should be using more chains.
+ * @NL80211_FEATURE_DYNAMIC_SMPS: Device supports dynamic spatial
+ *	multiplexing powersave, ie. can turn off all but one chain
+ *	and then wake the rest up as required after, for example,
+ *	rts/cts handshake.
  */
 enum nl80211_feature_flags {
 	NL80211_FEATURE_SK_TX_STATUS			= 1 << 0,
@@ -4056,6 +4068,8 @@ enum nl80211_feature_flags {
 	NL80211_FEATURE_QUIET				= 1 << 21,
 	NL80211_FEATURE_TX_POWER_INSERTION		= 1 << 22,
 	NL80211_FEATURE_ACKTO_ESTIMATION		= 1 << 23,
+	NL80211_FEATURE_STATIC_SMPS			= 1 << 24,
+	NL80211_FEATURE_DYNAMIC_SMPS			= 1 << 25,
 };
 
 /**
@@ -4129,6 +4143,25 @@ enum nl80211_acl_policy {
 	NL80211_ACL_POLICY_DENY_UNLESS_LISTED,
 };
 
+/**
+ * enum nl80211_smps_mode - SMPS mode
+ *
+ * Requested SMPS mode (for AP mode)
+ *
+ * @NL80211_SMPS_OFF: SMPS off (use all antennas).
+ * @NL80211_SMPS_STATIC: static SMPS (use a single antenna)
+ * @NL80211_SMPS_DYNAMIC: dynamic smps (start with a single antenna and
+ *	turn on other antennas after CTS/RTS).
+ */
+enum nl80211_smps_mode {
+	NL80211_SMPS_OFF,
+	NL80211_SMPS_STATIC,
+	NL80211_SMPS_DYNAMIC,
+
+	__NL80211_SMPS_AFTER_LAST,
+	NL80211_SMPS_MAX = __NL80211_SMPS_AFTER_LAST - 1
+};
+
 /**
  * enum nl80211_radar_event - type of radar event for DFS operation
  *
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e8114c7a37e3..4cce3e17964d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -394,6 +394,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_TSID] = { .type = NLA_U8 },
 	[NL80211_ATTR_USER_PRIO] = { .type = NLA_U8 },
 	[NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 },
+	[NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 },
 };
 
 /* policy for the key attributes */
@@ -3345,6 +3346,29 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 			return PTR_ERR(params.acl);
 	}
 
+	if (info->attrs[NL80211_ATTR_SMPS_MODE]) {
+		params.smps_mode =
+			nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]);
+		switch (params.smps_mode) {
+		case NL80211_SMPS_OFF:
+			break;
+		case NL80211_SMPS_STATIC:
+			if (!(rdev->wiphy.features &
+			      NL80211_FEATURE_STATIC_SMPS))
+				return -EINVAL;
+			break;
+		case NL80211_SMPS_DYNAMIC:
+			if (!(rdev->wiphy.features &
+			      NL80211_FEATURE_DYNAMIC_SMPS))
+				return -EINVAL;
+			break;
+		default:
+			return -EINVAL;
+		}
+	} else {
+		params.smps_mode = NL80211_SMPS_OFF;
+	}
+
 	wdev_lock(wdev);
 	err = rdev_start_ap(rdev, dev, &params);
 	if (!err) {
-- 
cgit v1.2.3


From 39e393bb4f653d38aea40190e1aa9a49062eed4d Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 11 Sep 2014 11:02:39 +0200
Subject: netfilter: nf_tables: add NFTA_MASQ_UNSPEC to nft_masq_attributes

To keep this consistent with other nft_*_attributes.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index eeec0ae845ef..66d66dd3ff79 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -806,6 +806,7 @@ enum nft_nat_attributes {
  * @NFTA_MASQ_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32)
  */
 enum nft_masq_attributes {
+	NFTA_MASQ_UNSPEC,
 	NFTA_MASQ_FLAGS,
 	__NFTA_MASQ_MAX
 };
-- 
cgit v1.2.3


From e351943b081f4d9e6f692ce1a6117e8d2e71f478 Mon Sep 17 00:00:00 2001
From: Josh Boyer <jwboyer@fedoraproject.org>
Date: Fri, 5 Sep 2014 13:19:59 -0400
Subject: drm/vmwgfx: Fix drm.h include

The userspace drm.h include doesn't prefix the drm directory.  This can lead
to compile failures as /usr/include/drm/ isn't in the standard gcc include
paths.  Fix it to be <drm/drm.h>, which matches the rest of the driver drm
header files that get installed into /usr/include/drm.

Red Hat Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1138759

Fixes: 1d7a5cbf8f74e
Reported-by: Jeffrey Bastian <jbastian@redhat.com>
Signed-off-by: Josh Boyer <jwboyer@fedoraproject.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/uapi/drm/vmwgfx_drm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h
index 4fc66f6b12ce..c472bedbe38e 100644
--- a/include/uapi/drm/vmwgfx_drm.h
+++ b/include/uapi/drm/vmwgfx_drm.h
@@ -29,7 +29,7 @@
 #define __VMWGFX_DRM_H__
 
 #ifndef __KERNEL__
-#include <drm.h>
+#include <drm/drm.h>
 #endif
 
 #define DRM_VMW_MAX_SURFACE_FACES 6
-- 
cgit v1.2.3


From 0e9871e3f79fd17c691b50a9669220c54ff084a2 Mon Sep 17 00:00:00 2001
From: Anton Danilov <littlesmilingcloud@gmail.com>
Date: Thu, 28 Aug 2014 10:11:27 +0400
Subject: netfilter: ipset: Add skbinfo extension kernel support in the ipset
 core.

Skbinfo extension provides mapping of metainformation with lookup in the ipset tables.
This patch defines the flags, the constants, the functions and the structures
for the data type independent support of the extension.
Note the firewall mark stores in the kernel structures as two 32bit values,
but transfered through netlink as one 64bit value.

Signed-off-by: Anton Danilov <littlesmilingcloud@gmail.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h      | 56 ++++++++++++++++++++++++++++-
 include/uapi/linux/netfilter/ipset/ip_set.h | 12 +++++++
 net/netfilter/ipset/ip_set_core.c           | 27 +++++++++++++-
 3 files changed, 93 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 96afc29184be..b97aac5142ed 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -57,6 +57,8 @@ enum ip_set_extension {
 	IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER),
 	IPSET_EXT_BIT_COMMENT = 2,
 	IPSET_EXT_COMMENT = (1 << IPSET_EXT_BIT_COMMENT),
+	IPSET_EXT_BIT_SKBINFO = 3,
+	IPSET_EXT_SKBINFO = (1 << IPSET_EXT_BIT_SKBINFO),
 	/* Mark set with an extension which needs to call destroy */
 	IPSET_EXT_BIT_DESTROY = 7,
 	IPSET_EXT_DESTROY = (1 << IPSET_EXT_BIT_DESTROY),
@@ -65,12 +67,14 @@ enum ip_set_extension {
 #define SET_WITH_TIMEOUT(s)	((s)->extensions & IPSET_EXT_TIMEOUT)
 #define SET_WITH_COUNTER(s)	((s)->extensions & IPSET_EXT_COUNTER)
 #define SET_WITH_COMMENT(s)	((s)->extensions & IPSET_EXT_COMMENT)
+#define SET_WITH_SKBINFO(s)	((s)->extensions & IPSET_EXT_SKBINFO)
 #define SET_WITH_FORCEADD(s)	((s)->flags & IPSET_CREATE_FLAG_FORCEADD)
 
 /* Extension id, in size order */
 enum ip_set_ext_id {
 	IPSET_EXT_ID_COUNTER = 0,
 	IPSET_EXT_ID_TIMEOUT,
+	IPSET_EXT_ID_SKBINFO,
 	IPSET_EXT_ID_COMMENT,
 	IPSET_EXT_ID_MAX,
 };
@@ -92,6 +96,10 @@ struct ip_set_ext {
 	u64 packets;
 	u64 bytes;
 	u32 timeout;
+	u32 skbmark;
+	u32 skbmarkmask;
+	u32 skbprio;
+	u16 skbqueue;
 	char *comment;
 };
 
@@ -104,6 +112,13 @@ struct ip_set_comment {
 	char *str;
 };
 
+struct ip_set_skbinfo {
+	u32 skbmark;
+	u32 skbmarkmask;
+	u32 skbprio;
+	u16 skbqueue;
+};
+
 struct ip_set;
 
 #define ext_timeout(e, s)	\
@@ -112,7 +127,8 @@ struct ip_set;
 (struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER])
 #define ext_comment(e, s)	\
 (struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT])
-
+#define ext_skbinfo(e, s)	\
+(struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO])
 
 typedef int (*ipset_adtfn)(struct ip_set *set, void *value,
 			   const struct ip_set_ext *ext,
@@ -256,6 +272,8 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
 		cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
 	if (SET_WITH_COMMENT(set))
 		cadt_flags |= IPSET_FLAG_WITH_COMMENT;
+	if (SET_WITH_SKBINFO(set))
+		cadt_flags |= IPSET_FLAG_WITH_SKBINFO;
 	if (SET_WITH_FORCEADD(set))
 		cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
 
@@ -304,6 +322,39 @@ ip_set_update_counter(struct ip_set_counter *counter,
 	}
 }
 
+static inline void
+ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
+		      const struct ip_set_ext *ext,
+		      struct ip_set_ext *mext, u32 flags)
+{
+		mext->skbmark = skbinfo->skbmark;
+		mext->skbmarkmask = skbinfo->skbmarkmask;
+		mext->skbprio = skbinfo->skbprio;
+		mext->skbqueue = skbinfo->skbqueue;
+}
+static inline bool
+ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
+{
+	return nla_put_net64(skb, IPSET_ATTR_SKBMARK,
+			     cpu_to_be64((u64)skbinfo->skbmark << 32 |
+					 skbinfo->skbmarkmask)) ||
+	       nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+			     cpu_to_be32(skbinfo->skbprio)) ||
+	       nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+			     cpu_to_be16(skbinfo->skbqueue));
+
+}
+
+static inline void
+ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
+		    const struct ip_set_ext *ext)
+{
+	skbinfo->skbmark = ext->skbmark;
+	skbinfo->skbmarkmask = ext->skbmarkmask;
+	skbinfo->skbprio = ext->skbprio;
+	skbinfo->skbqueue = ext->skbqueue;
+}
+
 static inline bool
 ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter)
 {
@@ -497,6 +548,9 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
 	if (SET_WITH_COMMENT(set) &&
 	    ip_set_put_comment(skb, ext_comment(e, set)))
 		return -EMSGSIZE;
+	if (SET_WITH_SKBINFO(set) &&
+	    ip_set_put_skbinfo(skb, ext_skbinfo(e, set)))
+		return -EMSGSIZE;
 	return 0;
 }
 
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 78c2f2e79920..ca03119111a2 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -115,6 +115,9 @@ enum {
 	IPSET_ATTR_BYTES,
 	IPSET_ATTR_PACKETS,
 	IPSET_ATTR_COMMENT,
+	IPSET_ATTR_SKBMARK,
+	IPSET_ATTR_SKBPRIO,
+	IPSET_ATTR_SKBQUEUE,
 	__IPSET_ATTR_ADT_MAX,
 };
 #define IPSET_ATTR_ADT_MAX	(__IPSET_ATTR_ADT_MAX - 1)
@@ -147,6 +150,7 @@ enum ipset_errno {
 	IPSET_ERR_COUNTER,
 	IPSET_ERR_COMMENT,
 	IPSET_ERR_INVALID_MARKMASK,
+	IPSET_ERR_SKBINFO,
 
 	/* Type specific error codes */
 	IPSET_ERR_TYPE_SPECIFIC = 4352,
@@ -170,6 +174,12 @@ enum ipset_cmd_flags {
 	IPSET_FLAG_MATCH_COUNTERS = (1 << IPSET_FLAG_BIT_MATCH_COUNTERS),
 	IPSET_FLAG_BIT_RETURN_NOMATCH = 7,
 	IPSET_FLAG_RETURN_NOMATCH = (1 << IPSET_FLAG_BIT_RETURN_NOMATCH),
+	IPSET_FLAG_BIT_MAP_SKBMARK = 8,
+	IPSET_FLAG_MAP_SKBMARK = (1 << IPSET_FLAG_BIT_MAP_SKBMARK),
+	IPSET_FLAG_BIT_MAP_SKBPRIO = 9,
+	IPSET_FLAG_MAP_SKBPRIO = (1 << IPSET_FLAG_BIT_MAP_SKBPRIO),
+	IPSET_FLAG_BIT_MAP_SKBQUEUE = 10,
+	IPSET_FLAG_MAP_SKBQUEUE = (1 << IPSET_FLAG_BIT_MAP_SKBQUEUE),
 	IPSET_FLAG_CMD_MAX = 15,
 };
 
@@ -187,6 +197,8 @@ enum ipset_cadt_flags {
 	IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT),
 	IPSET_FLAG_BIT_WITH_FORCEADD = 5,
 	IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD),
+	IPSET_FLAG_BIT_WITH_SKBINFO = 6,
+	IPSET_FLAG_WITH_SKBINFO = (1 << IPSET_FLAG_BIT_WITH_SKBINFO),
 	IPSET_FLAG_CADT_MAX	= 15,
 };
 
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 4ca4e5ca6f57..26c795e6b57f 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -337,6 +337,12 @@ const struct ip_set_ext_type ip_set_extensions[] = {
 		.len	= sizeof(unsigned long),
 		.align	= __alignof__(unsigned long),
 	},
+	[IPSET_EXT_ID_SKBINFO] = {
+		.type	= IPSET_EXT_SKBINFO,
+		.flag	= IPSET_FLAG_WITH_SKBINFO,
+		.len	= sizeof(struct ip_set_skbinfo),
+		.align	= __alignof__(struct ip_set_skbinfo),
+	},
 	[IPSET_EXT_ID_COMMENT] = {
 		.type	 = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY,
 		.flag	 = IPSET_FLAG_WITH_COMMENT,
@@ -382,6 +388,7 @@ int
 ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 		      struct ip_set_ext *ext)
 {
+	u64 fullmark;
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!(set->extensions & IPSET_EXT_TIMEOUT))
 			return -IPSET_ERR_TIMEOUT;
@@ -402,7 +409,25 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 			return -IPSET_ERR_COMMENT;
 		ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
 	}
-
+	if (tb[IPSET_ATTR_SKBMARK]) {
+		if (!(set->extensions & IPSET_EXT_SKBINFO))
+			return -IPSET_ERR_SKBINFO;
+		fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
+		ext->skbmark = fullmark >> 32;
+		ext->skbmarkmask = fullmark & 0xffffffff;
+	}
+	if (tb[IPSET_ATTR_SKBPRIO]) {
+		if (!(set->extensions & IPSET_EXT_SKBINFO))
+			return -IPSET_ERR_SKBINFO;
+		ext->skbprio = be32_to_cpu(nla_get_be32(
+					    tb[IPSET_ATTR_SKBPRIO]));
+	}
+	if (tb[IPSET_ATTR_SKBQUEUE]) {
+		if (!(set->extensions & IPSET_EXT_SKBINFO))
+			return -IPSET_ERR_SKBINFO;
+		ext->skbqueue = be16_to_cpu(nla_get_be16(
+					    tb[IPSET_ATTR_SKBQUEUE]));
+	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ip_set_get_extensions);
-- 
cgit v1.2.3


From 76cea4109ca89dea218fdc652d2e1535fd9b5fc7 Mon Sep 17 00:00:00 2001
From: Anton Danilov <littlesmilingcloud@gmail.com>
Date: Tue, 2 Sep 2014 14:21:20 +0400
Subject: netfilter: ipset: Add skbinfo extension support to SET target.

Signed-off-by: Anton Danilov <littlesmilingcloud@gmail.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/uapi/linux/netfilter/xt_set.h |  10 +++
 net/netfilter/xt_set.c                | 155 ++++++++++++++++++++++++++++++++++
 2 files changed, 165 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/xt_set.h b/include/uapi/linux/netfilter/xt_set.h
index 964d3d42f874..d6a1df1f2947 100644
--- a/include/uapi/linux/netfilter/xt_set.h
+++ b/include/uapi/linux/netfilter/xt_set.h
@@ -71,4 +71,14 @@ struct xt_set_info_match_v3 {
 	__u32 flags;
 };
 
+/* Revision 3 target */
+
+struct xt_set_info_target_v3 {
+	struct xt_set_info add_set;
+	struct xt_set_info del_set;
+	struct xt_set_info map_set;
+	__u32 flags;
+	__u32 timeout;
+};
+
 #endif /*_XT_SET_H*/
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index cb70f6ec5695..5732cd64acc0 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -366,6 +366,140 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
 #define set_target_v2_checkentry	set_target_v1_checkentry
 #define set_target_v2_destroy		set_target_v1_destroy
 
+/* Revision 3 target */
+
+static unsigned int
+set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_set_info_target_v3 *info = par->targinfo;
+	ADT_OPT(add_opt, par->family, info->add_set.dim,
+		info->add_set.flags, info->flags, info->timeout);
+	ADT_OPT(del_opt, par->family, info->del_set.dim,
+		info->del_set.flags, 0, UINT_MAX);
+	ADT_OPT(map_opt, par->family, info->map_set.dim,
+		info->map_set.flags, 0, UINT_MAX);
+
+	int ret;
+
+	/* Normalize to fit into jiffies */
+	if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
+	    add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC)
+		add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC;
+	if (info->add_set.index != IPSET_INVALID_ID)
+		ip_set_add(info->add_set.index, skb, par, &add_opt);
+	if (info->del_set.index != IPSET_INVALID_ID)
+		ip_set_del(info->del_set.index, skb, par, &del_opt);
+	if (info->map_set.index != IPSET_INVALID_ID) {
+		map_opt.cmdflags |= info->flags & (IPSET_FLAG_MAP_SKBMARK |
+						   IPSET_FLAG_MAP_SKBPRIO |
+						   IPSET_FLAG_MAP_SKBQUEUE);
+		ret = match_set(info->map_set.index, skb, par, &map_opt,
+				info->map_set.flags & IPSET_INV_MATCH);
+		if (!ret)
+			return XT_CONTINUE;
+		if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBMARK)
+			skb->mark = (skb->mark & ~(map_opt.ext.skbmarkmask))
+				    ^ (map_opt.ext.skbmark);
+		if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBPRIO)
+			skb->priority = map_opt.ext.skbprio;
+		if ((map_opt.cmdflags & IPSET_FLAG_MAP_SKBQUEUE) &&
+		    skb->dev &&
+		    skb->dev->real_num_tx_queues > map_opt.ext.skbqueue)
+			skb_set_queue_mapping(skb, map_opt.ext.skbqueue);
+	}
+	return XT_CONTINUE;
+}
+
+
+static int
+set_target_v3_checkentry(const struct xt_tgchk_param *par)
+{
+	const struct xt_set_info_target_v3 *info = par->targinfo;
+	ip_set_id_t index;
+
+	if (info->add_set.index != IPSET_INVALID_ID) {
+		index = ip_set_nfnl_get_byindex(par->net,
+						info->add_set.index);
+		if (index == IPSET_INVALID_ID) {
+			pr_warn("Cannot find add_set index %u as target\n",
+				info->add_set.index);
+			return -ENOENT;
+		}
+	}
+
+	if (info->del_set.index != IPSET_INVALID_ID) {
+		index = ip_set_nfnl_get_byindex(par->net,
+						info->del_set.index);
+		if (index == IPSET_INVALID_ID) {
+			pr_warn("Cannot find del_set index %u as target\n",
+				info->del_set.index);
+			if (info->add_set.index != IPSET_INVALID_ID)
+				ip_set_nfnl_put(par->net,
+						info->add_set.index);
+			return -ENOENT;
+		}
+	}
+
+	if (info->map_set.index != IPSET_INVALID_ID) {
+		if (strncmp(par->table, "mangle", 7)) {
+			pr_warn("--map-set only usable from mangle table\n");
+			return -EINVAL;
+		}
+		if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) |
+		     (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) &&
+		     !(par->hook_mask & (1 << NF_INET_FORWARD |
+					 1 << NF_INET_LOCAL_OUT |
+					 1 << NF_INET_POST_ROUTING))) {
+			pr_warn("mapping of prio or/and queue is allowed only"
+				"from OUTPUT/FORWARD/POSTROUTING chains\n");
+			return -EINVAL;
+		}
+		index = ip_set_nfnl_get_byindex(par->net,
+						info->map_set.index);
+		if (index == IPSET_INVALID_ID) {
+			pr_warn("Cannot find map_set index %u as target\n",
+				info->map_set.index);
+			if (info->add_set.index != IPSET_INVALID_ID)
+				ip_set_nfnl_put(par->net,
+						info->add_set.index);
+			if (info->del_set.index != IPSET_INVALID_ID)
+				ip_set_nfnl_put(par->net,
+						info->del_set.index);
+			return -ENOENT;
+		}
+	}
+
+	if (info->add_set.dim > IPSET_DIM_MAX ||
+	    info->del_set.dim > IPSET_DIM_MAX ||
+	    info->map_set.dim > IPSET_DIM_MAX) {
+		pr_warn("Protocol error: SET target dimension "
+			"is over the limit!\n");
+		if (info->add_set.index != IPSET_INVALID_ID)
+			ip_set_nfnl_put(par->net, info->add_set.index);
+		if (info->del_set.index != IPSET_INVALID_ID)
+			ip_set_nfnl_put(par->net, info->del_set.index);
+		if (info->map_set.index != IPSET_INVALID_ID)
+			ip_set_nfnl_put(par->net, info->map_set.index);
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
+static void
+set_target_v3_destroy(const struct xt_tgdtor_param *par)
+{
+	const struct xt_set_info_target_v3 *info = par->targinfo;
+
+	if (info->add_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(par->net, info->add_set.index);
+	if (info->del_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(par->net, info->del_set.index);
+	if (info->map_set.index != IPSET_INVALID_ID)
+		ip_set_nfnl_put(par->net, info->map_set.index);
+}
+
+
 static struct xt_match set_matches[] __read_mostly = {
 	{
 		.name		= "set",
@@ -493,6 +627,27 @@ static struct xt_target set_targets[] __read_mostly = {
 		.destroy	= set_target_v2_destroy,
 		.me		= THIS_MODULE
 	},
+	/* --map-set support */
+	{
+		.name		= "SET",
+		.revision	= 3,
+		.family		= NFPROTO_IPV4,
+		.target		= set_target_v3,
+		.targetsize	= sizeof(struct xt_set_info_target_v3),
+		.checkentry	= set_target_v3_checkentry,
+		.destroy	= set_target_v3_destroy,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "SET",
+		.revision	= 3,
+		.family		= NFPROTO_IPV6,
+		.target		= set_target_v3,
+		.targetsize	= sizeof(struct xt_set_info_target_v3),
+		.checkentry	= set_target_v3_checkentry,
+		.destroy	= set_target_v3_destroy,
+		.me		= THIS_MODULE
+	},
 };
 
 static int __init xt_set_init(void)
-- 
cgit v1.2.3


From 6cff339bbd5f9eda7a5e8a521f91a88d046e6d0c Mon Sep 17 00:00:00 2001
From: Alex Gartrell <agartrell@fb.com>
Date: Tue, 9 Sep 2014 16:40:20 -0700
Subject: ipvs: Add destination address family to netlink interface

This is necessary to support heterogeneous pools.  For example, if you have
an ipv6 addressed network, you'll want to be able to forward ipv4 traffic
into it.

This patch enforces that destination address family is the same as service
family, as none of the forwarding mechanisms support anything else.

For the old setsockopt mechanism, we simply set the dest address family to
AF_INET as we do with the service.

Signed-off-by: Alex Gartrell <agartrell@fb.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h            |  3 +++
 include/uapi/linux/ip_vs.h     |  3 +++
 net/netfilter/ipvs/ip_vs_ctl.c | 45 +++++++++++++++++++++++++++++++++++-------
 3 files changed, 44 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 624a8a54806d..b7e2b624d58e 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -648,6 +648,9 @@ struct ip_vs_dest_user_kern {
 	/* thresholds for active connections */
 	u32			u_threshold;	/* upper threshold */
 	u32			l_threshold;	/* lower threshold */
+
+	/* Address family of addr */
+	u16			af;
 };
 
 
diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h
index fbcffe8041f7..cabe95d5b461 100644
--- a/include/uapi/linux/ip_vs.h
+++ b/include/uapi/linux/ip_vs.h
@@ -384,6 +384,9 @@ enum {
 	IPVS_DEST_ATTR_PERSIST_CONNS,	/* persistent connections */
 
 	IPVS_DEST_ATTR_STATS,		/* nested attribute for dest stats */
+
+	IPVS_DEST_ATTR_ADDR_FAMILY,	/* Address family of address */
+
 	__IPVS_DEST_ATTR_MAX,
 };
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index bd2b208ba56c..594cec7ebc43 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -816,6 +816,8 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 	dest->u_threshold = udest->u_threshold;
 	dest->l_threshold = udest->l_threshold;
 
+	dest->af = udest->af;
+
 	spin_lock_bh(&dest->dst_lock);
 	__ip_vs_dst_cache_reset(dest);
 	spin_unlock_bh(&dest->dst_lock);
@@ -846,8 +848,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 
 	EnterFunction(2);
 
+	/* Temporary for consistency */
+	if (udest->af != svc->af)
+		return -EINVAL;
+
 #ifdef CONFIG_IP_VS_IPV6
-	if (svc->af == AF_INET6) {
+	if (udest->af == AF_INET6) {
 		atype = ipv6_addr_type(&udest->addr.in6);
 		if ((!(atype & IPV6_ADDR_UNICAST) ||
 			atype & IPV6_ADDR_LINKLOCAL) &&
@@ -875,12 +881,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 		u64_stats_init(&ip_vs_dest_stats->syncp);
 	}
 
-	dest->af = svc->af;
+	dest->af = udest->af;
 	dest->protocol = svc->protocol;
 	dest->vaddr = svc->addr;
 	dest->vport = svc->port;
 	dest->vfwmark = svc->fwmark;
-	ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
+	ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr);
 	dest->port = udest->port;
 
 	atomic_set(&dest->activeconns, 0);
@@ -928,7 +934,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 		return -ERANGE;
 	}
 
-	ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+	ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
 
 	/* We use function that requires RCU lock */
 	rcu_read_lock();
@@ -949,7 +955,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 	if (dest != NULL) {
 		IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
 			      "dest->refcnt=%d, service %u/%s:%u\n",
-			      IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
+			      IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
 			      atomic_read(&dest->refcnt),
 			      dest->vfwmark,
 			      IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
@@ -992,7 +998,7 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 		return -ERANGE;
 	}
 
-	ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+	ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
 
 	/* We use function that requires RCU lock */
 	rcu_read_lock();
@@ -2244,6 +2250,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
 	udest->weight		= udest_compat->weight;
 	udest->u_threshold	= udest_compat->u_threshold;
 	udest->l_threshold	= udest_compat->l_threshold;
+	udest->af		= AF_INET;
 }
 
 static int
@@ -2480,6 +2487,12 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
 			if (count >= get->num_dests)
 				break;
 
+			/* Cannot expose heterogeneous members via sockopt
+			 * interface
+			 */
+			if (dest->af != svc->af)
+				continue;
+
 			entry.addr = dest->addr.ip;
 			entry.port = dest->port;
 			entry.conn_flags = atomic_read(&dest->conn_flags);
@@ -2777,6 +2790,7 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
 	[IPVS_DEST_ATTR_INACT_CONNS]	= { .type = NLA_U32 },
 	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
 	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
+	[IPVS_DEST_ATTR_ADDR_FAMILY]	= { .type = NLA_U16 },
 };
 
 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
@@ -3032,7 +3046,8 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
 	    nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
 			atomic_read(&dest->inactconns)) ||
 	    nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
-			atomic_read(&dest->persistconns)))
+			atomic_read(&dest->persistconns)) ||
+	    nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
 		goto nla_put_failure;
 	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
 		goto nla_put_failure;
@@ -3113,6 +3128,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
 {
 	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
 	struct nlattr *nla_addr, *nla_port;
+	struct nlattr *nla_addr_family;
 
 	/* Parse mandatory identifying destination fields first */
 	if (nla == NULL ||
@@ -3121,6 +3137,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
 
 	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
 	nla_port	= attrs[IPVS_DEST_ATTR_PORT];
+	nla_addr_family	= attrs[IPVS_DEST_ATTR_ADDR_FAMILY];
 
 	if (!(nla_addr && nla_port))
 		return -EINVAL;
@@ -3130,6 +3147,11 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
 	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
 	udest->port = nla_get_be16(nla_port);
 
+	if (nla_addr_family)
+		udest->af = nla_get_u16(nla_addr_family);
+	else
+		udest->af = 0;
+
 	/* If a full entry was requested, check for the additional fields */
 	if (full_entry) {
 		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
@@ -3357,6 +3379,15 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 					    need_full_dest);
 		if (ret)
 			goto out;
+
+		/* Old protocols did not allow the user to specify address
+		 * family, so we set it to zero instead.  We also didn't
+		 * allow heterogeneous pools in the old code, so it's safe
+		 * to assume that this will have the same address family as
+		 * the service.
+		 */
+		if (udest.af == 0)
+			udest.af = svc->af;
 	}
 
 	switch (cmd) {
-- 
cgit v1.2.3


From 971427f353f3c42c8dcef62e7124440df68eb809 Mon Sep 17 00:00:00 2001
From: Andy Zhou <azhou@nicira.com>
Date: Mon, 15 Sep 2014 19:37:25 -0700
Subject: openvswitch: Add recirc and hash action.

Recirc action allows a packet to reenter openvswitch processing.
currently openvswitch lookup flow for packet received and execute
set of actions on that packet, with help of recirc action we can
process/modify the packet and recirculate it back in openvswitch
for another pass.

OVS hash action calculates 5-tupple hash and set hash in flow-key
hash. This can be used along with recirculation for distributing
packets among different ports for bond devices.
For example:
OVS bonding can use following actions:
Match on: bond flow; Action: hash, recirc(id)
Match on: recirc-id == id and hash lower bits == a;
          Action: output port_bond_a

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
---
 include/uapi/linux/openvswitch.h |  26 +++++
 net/openvswitch/actions.c        | 203 +++++++++++++++++++++++++++++++++++++--
 net/openvswitch/datapath.c       |  11 ++-
 net/openvswitch/datapath.h       |   7 +-
 net/openvswitch/flow.c           |   7 +-
 net/openvswitch/flow.h           |   3 +
 net/openvswitch/flow_netlink.c   |  43 ++++++++-
 7 files changed, 288 insertions(+), 12 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index a794d1dd7b40..f7fc507d82ab 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -289,6 +289,9 @@ enum ovs_key_attr {
 	OVS_KEY_ATTR_TUNNEL,    /* Nested set of ovs_tunnel attributes */
 	OVS_KEY_ATTR_SCTP,      /* struct ovs_key_sctp */
 	OVS_KEY_ATTR_TCP_FLAGS,	/* be16 TCP flags. */
+	OVS_KEY_ATTR_DP_HASH,      /* u32 hash value. Value 0 indicates the hash
+				   is not computed by the datapath. */
+	OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */
 
 #ifdef __KERNEL__
 	OVS_KEY_ATTR_IPV4_TUNNEL,  /* struct ovs_key_ipv4_tunnel */
@@ -493,6 +496,27 @@ struct ovs_action_push_vlan {
 	__be16 vlan_tci;	/* 802.1Q TCI (VLAN ID and priority). */
 };
 
+/* Data path hash algorithm for computing Datapath hash.
+ *
+ * The algorithm type only specifies the fields in a flow
+ * will be used as part of the hash. Each datapath is free
+ * to use its own hash algorithm. The hash value will be
+ * opaque to the user space daemon.
+ */
+enum ovs_hash_alg {
+	OVS_HASH_ALG_L4,
+};
+
+/*
+ * struct ovs_action_hash - %OVS_ACTION_ATTR_HASH action argument.
+ * @hash_alg: Algorithm used to compute hash prior to recirculation.
+ * @hash_basis: basis used for computing hash.
+ */
+struct ovs_action_hash {
+	uint32_t  hash_alg;     /* One of ovs_hash_alg. */
+	uint32_t  hash_basis;
+};
+
 /**
  * enum ovs_action_attr - Action types.
  *
@@ -521,6 +545,8 @@ enum ovs_action_attr {
 	OVS_ACTION_ATTR_PUSH_VLAN,    /* struct ovs_action_push_vlan. */
 	OVS_ACTION_ATTR_POP_VLAN,     /* No argument. */
 	OVS_ACTION_ATTR_SAMPLE,       /* Nested OVS_SAMPLE_ATTR_*. */
+	OVS_ACTION_ATTR_RECIRC,       /* u32 recirc_id. */
+	OVS_ACTION_ATTR_HASH,	      /* struct ovs_action_hash. */
 	__OVS_ACTION_ATTR_MAX
 };
 
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index c31bb80c984f..6932a42e41a2 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2013 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -35,12 +35,78 @@
 #include <net/sctp/checksum.h>
 
 #include "datapath.h"
+#include "flow.h"
 #include "vport.h"
 
 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			      struct sw_flow_key *key,
 			      const struct nlattr *attr, int len);
 
+struct deferred_action {
+	struct sk_buff *skb;
+	const struct nlattr *actions;
+
+	/* Store pkt_key clone when creating deferred action. */
+	struct sw_flow_key pkt_key;
+};
+
+#define DEFERRED_ACTION_FIFO_SIZE 10
+struct action_fifo {
+	int head;
+	int tail;
+	/* Deferred action fifo queue storage. */
+	struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
+};
+
+static struct action_fifo __percpu *action_fifos;
+static DEFINE_PER_CPU(int, exec_actions_level);
+
+static void action_fifo_init(struct action_fifo *fifo)
+{
+	fifo->head = 0;
+	fifo->tail = 0;
+}
+
+static bool action_fifo_is_empty(struct action_fifo *fifo)
+{
+	return (fifo->head == fifo->tail);
+}
+
+static struct deferred_action *action_fifo_get(struct action_fifo *fifo)
+{
+	if (action_fifo_is_empty(fifo))
+		return NULL;
+
+	return &fifo->fifo[fifo->tail++];
+}
+
+static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
+{
+	if (fifo->head >= DEFERRED_ACTION_FIFO_SIZE - 1)
+		return NULL;
+
+	return &fifo->fifo[fifo->head++];
+}
+
+/* Return true if fifo is not full */
+static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
+						    struct sw_flow_key *key,
+						    const struct nlattr *attr)
+{
+	struct action_fifo *fifo;
+	struct deferred_action *da;
+
+	fifo = this_cpu_ptr(action_fifos);
+	da = action_fifo_put(fifo);
+	if (da) {
+		da->skb = skb;
+		da->actions = attr;
+		da->pkt_key = *key;
+	}
+
+	return da;
+}
+
 static int make_writable(struct sk_buff *skb, int write_len)
 {
 	if (!pskb_may_pull(skb, write_len))
@@ -485,8 +551,29 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
 		/* Skip the sample action when out of memory. */
 		return 0;
 
-	/* do_execute_actions() will consume the cloned skb. */
-	return do_execute_actions(dp, skb, key, a, rem);
+	if (!add_deferred_actions(skb, key, a)) {
+		if (net_ratelimit())
+			pr_warn("%s: deferred actions limit reached, dropping sample action\n",
+				ovs_dp_name(dp));
+
+		kfree_skb(skb);
+	}
+	return 0;
+}
+
+static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
+			 const struct nlattr *attr)
+{
+	struct ovs_action_hash *hash_act = nla_data(attr);
+	u32 hash = 0;
+
+	/* OVS_HASH_ALG_L4 is the only possible hash algorithm.  */
+	hash = skb_get_hash(skb);
+	hash = jhash_1word(hash, hash_act->hash_basis);
+	if (!hash)
+		hash = 0x1;
+
+	key->ovs_flow_hash = hash;
 }
 
 static int execute_set_action(struct sk_buff *skb,
@@ -535,6 +622,44 @@ static int execute_set_action(struct sk_buff *skb,
 	return err;
 }
 
+static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
+			  struct sw_flow_key *key,
+			  const struct nlattr *a, int rem)
+{
+	struct deferred_action *da;
+	int err;
+
+	err = ovs_flow_key_update(skb, key);
+	if (err)
+		return err;
+
+	if (!last_action(a, rem)) {
+		/* Recirc action is the not the last action
+		 * of the action list, need to clone the skb.
+		 */
+		skb = skb_clone(skb, GFP_ATOMIC);
+
+		/* Skip the recirc action when out of memory, but
+		 * continue on with the rest of the action list.
+		 */
+		if (!skb)
+			return 0;
+	}
+
+	da = add_deferred_actions(skb, key, NULL);
+	if (da) {
+		da->pkt_key.recirc_id = nla_get_u32(a);
+	} else {
+		kfree_skb(skb);
+
+		if (net_ratelimit())
+			pr_warn("%s: deferred action limit reached, drop recirc action\n",
+				ovs_dp_name(dp));
+	}
+
+	return 0;
+}
+
 /* Execute a list of actions against 'skb'. */
 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			      struct sw_flow_key *key,
@@ -566,6 +691,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			output_userspace(dp, skb, key, a);
 			break;
 
+		case OVS_ACTION_ATTR_HASH:
+			execute_hash(skb, key, a);
+			break;
+
 		case OVS_ACTION_ATTR_PUSH_VLAN:
 			err = push_vlan(skb, nla_data(a));
 			if (unlikely(err)) /* skb already freed. */
@@ -576,6 +705,17 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			err = pop_vlan(skb);
 			break;
 
+		case OVS_ACTION_ATTR_RECIRC:
+			err = execute_recirc(dp, skb, key, a, rem);
+			if (last_action(a, rem)) {
+				/* If this is the last action, the skb has
+				 * been consumed or freed.
+				 * Return immediately.
+				 */
+				return err;
+			}
+			break;
+
 		case OVS_ACTION_ATTR_SET:
 			err = execute_set_action(skb, nla_data(a));
 			break;
@@ -601,12 +741,63 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 	return 0;
 }
 
+static void process_deferred_actions(struct datapath *dp)
+{
+	struct action_fifo *fifo = this_cpu_ptr(action_fifos);
+
+	/* Do not touch the FIFO in case there is no deferred actions. */
+	if (action_fifo_is_empty(fifo))
+		return;
+
+	/* Finishing executing all deferred actions. */
+	do {
+		struct deferred_action *da = action_fifo_get(fifo);
+		struct sk_buff *skb = da->skb;
+		struct sw_flow_key *key = &da->pkt_key;
+		const struct nlattr *actions = da->actions;
+
+		if (actions)
+			do_execute_actions(dp, skb, key, actions,
+					   nla_len(actions));
+		else
+			ovs_dp_process_packet(skb, key);
+	} while (!action_fifo_is_empty(fifo));
+
+	/* Reset FIFO for the next packet.  */
+	action_fifo_init(fifo);
+}
+
 /* Execute a list of actions against 'skb'. */
 int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			struct sw_flow_key *key)
 {
-	struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
+	int level = this_cpu_read(exec_actions_level);
+	struct sw_flow_actions *acts;
+	int err;
+
+	acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
+
+	this_cpu_inc(exec_actions_level);
+	err = do_execute_actions(dp, skb, key,
+				 acts->actions, acts->actions_len);
+
+	if (!level)
+		process_deferred_actions(dp);
+
+	this_cpu_dec(exec_actions_level);
+	return err;
+}
+
+int action_fifos_init(void)
+{
+	action_fifos = alloc_percpu(struct action_fifo);
+	if (!action_fifos)
+		return -ENOMEM;
 
-	return do_execute_actions(dp, skb, key,
-				  acts->actions, acts->actions_len);
+	return 0;
+}
+
+void action_fifos_exit(void)
+{
+	free_percpu(action_fifos);
 }
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 7e0819919b8a..16cad14fa81e 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -156,7 +156,7 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex)
 }
 
 /* Must be called with rcu_read_lock or ovs_mutex. */
-static const char *ovs_dp_name(const struct datapath *dp)
+const char *ovs_dp_name(const struct datapath *dp)
 {
 	struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
 	return vport->ops->get_name(vport);
@@ -2065,10 +2065,14 @@ static int __init dp_init(void)
 
 	pr_info("Open vSwitch switching datapath\n");
 
-	err = ovs_internal_dev_rtnl_link_register();
+	err = action_fifos_init();
 	if (err)
 		goto error;
 
+	err = ovs_internal_dev_rtnl_link_register();
+	if (err)
+		goto error_action_fifos_exit;
+
 	err = ovs_flow_init();
 	if (err)
 		goto error_unreg_rtnl_link;
@@ -2101,6 +2105,8 @@ error_flow_exit:
 	ovs_flow_exit();
 error_unreg_rtnl_link:
 	ovs_internal_dev_rtnl_link_unregister();
+error_action_fifos_exit:
+	action_fifos_exit();
 error:
 	return err;
 }
@@ -2114,6 +2120,7 @@ static void dp_cleanup(void)
 	ovs_vport_exit();
 	ovs_flow_exit();
 	ovs_internal_dev_rtnl_link_unregister();
+	action_fifos_exit();
 }
 
 module_init(dp_init);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 25b0e888cb27..ac3f3df96961 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2012 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -189,13 +189,18 @@ void ovs_dp_detach_port(struct vport *);
 int ovs_dp_upcall(struct datapath *, struct sk_buff *,
 		  const struct dp_upcall_info *);
 
+const char *ovs_dp_name(const struct datapath *dp);
 struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
 					 u8 cmd);
 
 int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			struct sw_flow_key *);
+
 void ovs_dp_notify_wq(struct work_struct *work);
 
+int action_fifos_init(void);
+void action_fifos_exit(void);
+
 #define OVS_NLERR(fmt, ...)					\
 do {								\
 	if (net_ratelimit())					\
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index bf8442071d75..4010423f2831 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2013 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -606,6 +606,11 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 	return 0;
 }
 
+int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	return key_extract(skb, key);
+}
+
 int ovs_flow_key_extract(struct ovs_key_ipv4_tunnel *tun_key,
 			 struct sk_buff *skb, struct sw_flow_key *key)
 {
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 3869a540365c..0f5db4ec565d 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -72,6 +72,8 @@ struct sw_flow_key {
 		u32	skb_mark;	/* SKB mark. */
 		u16	in_port;	/* Input switch port (or DP_MAX_PORTS). */
 	} __packed phy; /* Safe when right after 'tun_key'. */
+	u32 ovs_flow_hash;		/* Datapath computed hash value.  */
+	u32 recirc_id;			/* Recirculation ID.  */
 	struct {
 		u8     src[ETH_ALEN];	/* Ethernet source address. */
 		u8     dst[ETH_ALEN];	/* Ethernet destination address. */
@@ -187,6 +189,7 @@ void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
 void ovs_flow_stats_clear(struct sw_flow *);
 u64 ovs_flow_used_time(unsigned long flow_jiffies);
 
+int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
 int ovs_flow_key_extract(struct ovs_key_ipv4_tunnel *tun_key,
 			 struct sk_buff *skb, struct sw_flow_key *key);
 /* Extract key from packet coming from userspace. */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 630b320fbf3e..f4c8daa73965 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2013 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -251,6 +251,8 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
 	[OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
 	[OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
 	[OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
+	[OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
+	[OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
 	[OVS_KEY_ATTR_TUNNEL] = -1,
 };
 
@@ -454,6 +456,20 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
 static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
 				 const struct nlattr **a, bool is_mask)
 {
+	if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
+		u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
+
+		SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
+		*attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH);
+	}
+
+	if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) {
+		u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
+
+		SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
+		*attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID);
+	}
+
 	if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
 		SW_FLOW_KEY_PUT(match, phy.priority,
 			  nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
@@ -873,6 +889,12 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
 	struct nlattr *nla, *encap;
 	bool is_mask = (swkey != output);
 
+	if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
+		goto nla_put_failure;
+
 	if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
 		goto nla_put_failure;
 
@@ -1401,11 +1423,13 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
 		/* Expected argument lengths, (u32)-1 for variable length. */
 		static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
 			[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
+			[OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
 			[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
 			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
 			[OVS_ACTION_ATTR_POP_VLAN] = 0,
 			[OVS_ACTION_ATTR_SET] = (u32)-1,
-			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1
+			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
+			[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
 		};
 		const struct ovs_action_push_vlan *vlan;
 		int type = nla_type(a);
@@ -1432,6 +1456,18 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
 				return -EINVAL;
 			break;
 
+		case OVS_ACTION_ATTR_HASH: {
+			const struct ovs_action_hash *act_hash = nla_data(a);
+
+			switch (act_hash->hash_alg) {
+			case OVS_HASH_ALG_L4:
+				break;
+			default:
+				return  -EINVAL;
+			}
+
+			break;
+		}
 
 		case OVS_ACTION_ATTR_POP_VLAN:
 			break;
@@ -1444,6 +1480,9 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
 				return -EINVAL;
 			break;
 
+		case OVS_ACTION_ATTR_RECIRC:
+			break;
+
 		case OVS_ACTION_ATTR_SET:
 			err = validate_set(a, key, sfa, &skip_copy);
 			if (err)
-- 
cgit v1.2.3


From 1b0bf88fd8b845aef4300c7c0feca774265dd1c4 Mon Sep 17 00:00:00 2001
From: Robert Baldyga <r.baldyga@samsung.com>
Date: Tue, 9 Sep 2014 08:23:17 +0200
Subject: usb: gadget: f_fs: virtual endpoint address mapping

This patch introduces virtual endpoint address mapping. It separates
function logic form physical endpoint addresses making it more hardware
independent.

Following modifications changes user space API, so to enable them user
have to switch on the FUNCTIONFS_VIRTUAL_ADDR flag in descriptors.

Endpoints are now refered using virtual endpoint addresses chosen by
user in endpoint descpriptors. This applies to each context when endpoint
address can be used:
- when accessing endpoint files in FunctionFS filesystemi (in file name),
- in setup requests directed to specific endpoint (in wIndex field),
- in descriptors returned by FUNCTIONFS_ENDPOINT_DESC ioctl.

In endpoint file names the endpoint address number is formatted as
double-digit hexadecimal value ("ep%02x") which has few advantages -
it is easy to parse, allows to easly recognize endpoint direction basing
on its name (IN endpoint number starts with digit 8, and OUT with 0)
which can be useful for debugging purpose, and it makes easier to introduce
further features allowing to use each endpoint number in both directions
to have more endpoints available for function if hardware supports this
(for example we could have ep01 which is endpoint 1 with OUT direction,
and ep81 which is endpoint 1 with IN direction).

Physical endpoint address can be still obtained using ioctl named
FUNCTIONFS_ENDPOINT_REVMAP, but now it's not neccesary to handle
USB transactions properly.

Signed-off-by: Robert Baldyga <r.baldyga@samsung.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/function/f_fs.c  | 23 +++++++++++++++++++++--
 drivers/usb/gadget/function/u_fs.h  |  2 ++
 include/uapi/linux/usb/functionfs.h |  1 +
 3 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index e4afe8df5553..4ad11e03cf54 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1557,7 +1557,10 @@ static int ffs_epfiles_create(struct ffs_data *ffs)
 		epfile->ffs = ffs;
 		mutex_init(&epfile->mutex);
 		init_waitqueue_head(&epfile->wait);
-		sprintf(epfiles->name, "ep%u",  i);
+		if (ffs->user_flags & FUNCTIONFS_VIRTUAL_ADDR)
+			sprintf(epfiles->name, "ep%02x", ffs->eps_addrmap[i]);
+		else
+			sprintf(epfiles->name, "ep%u", i);
 		if (!unlikely(ffs_sb_create_file(ffs->sb, epfiles->name, epfile,
 						 &ffs_epfile_operations,
 						 &epfile->dentry))) {
@@ -2106,10 +2109,12 @@ static int __ffs_data_got_descs(struct ffs_data *ffs,
 		break;
 	case FUNCTIONFS_DESCRIPTORS_MAGIC_V2:
 		flags = get_unaligned_le32(data + 8);
+		ffs->user_flags = flags;
 		if (flags & ~(FUNCTIONFS_HAS_FS_DESC |
 			      FUNCTIONFS_HAS_HS_DESC |
 			      FUNCTIONFS_HAS_SS_DESC |
-			      FUNCTIONFS_HAS_MS_OS_DESC)) {
+			      FUNCTIONFS_HAS_MS_OS_DESC |
+			      FUNCTIONFS_VIRTUAL_ADDR)) {
 			ret = -ENOSYS;
 			goto error;
 		}
@@ -2466,7 +2471,13 @@ static int __ffs_func_bind_do_descs(enum ffs_entity_type type, u8 *valuep,
 	} else {
 		struct usb_request *req;
 		struct usb_ep *ep;
+		u8 bEndpointAddress;
 
+		/*
+		 * We back up bEndpointAddress because autoconfig overwrites
+		 * it with physical endpoint address.
+		 */
+		bEndpointAddress = ds->bEndpointAddress;
 		pr_vdebug("autoconfig\n");
 		ep = usb_ep_autoconfig(func->gadget, ds);
 		if (unlikely(!ep))
@@ -2481,6 +2492,12 @@ static int __ffs_func_bind_do_descs(enum ffs_entity_type type, u8 *valuep,
 		ffs_ep->req = req;
 		func->eps_revmap[ds->bEndpointAddress &
 				 USB_ENDPOINT_NUMBER_MASK] = idx + 1;
+		/*
+		 * If we use virtual address mapping, we restore
+		 * original bEndpointAddress value.
+		 */
+		if (func->ffs->user_flags & FUNCTIONFS_VIRTUAL_ADDR)
+			ds->bEndpointAddress = bEndpointAddress;
 	}
 	ffs_dump_mem(": Rewritten ep desc", ds, ds->bLength);
 
@@ -2925,6 +2942,8 @@ static int ffs_func_setup(struct usb_function *f,
 		ret = ffs_func_revmap_ep(func, le16_to_cpu(creq->wIndex));
 		if (unlikely(ret < 0))
 			return ret;
+		if (func->ffs->user_flags & FUNCTIONFS_VIRTUAL_ADDR)
+			ret = func->ffs->eps_addrmap[ret];
 		break;
 
 	default:
diff --git a/drivers/usb/gadget/function/u_fs.h b/drivers/usb/gadget/function/u_fs.h
index d48897e8ffeb..cd128e31f808 100644
--- a/drivers/usb/gadget/function/u_fs.h
+++ b/drivers/usb/gadget/function/u_fs.h
@@ -224,6 +224,8 @@ struct ffs_data {
 	void				*ms_os_descs_ext_prop_name_avail;
 	void				*ms_os_descs_ext_prop_data_avail;
 
+	unsigned			user_flags;
+
 	u8				eps_addrmap[15];
 
 	unsigned short			strings_count;
diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index 7c7a2feb0e6e..295ba299e7bd 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -19,6 +19,7 @@ enum functionfs_flags {
 	FUNCTIONFS_HAS_HS_DESC = 2,
 	FUNCTIONFS_HAS_SS_DESC = 4,
 	FUNCTIONFS_HAS_MS_OS_DESC = 8,
+	FUNCTIONFS_VIRTUAL_ADDR = 16,
 };
 
 /* Descriptor of an non-audio endpoint */
-- 
cgit v1.2.3


From d60eacb07053142bfb9b41582074a89a790a9d46 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Sep 2014 10:27:33 +0100
Subject: KVM: device: add simple registration mechanism for kvm_device_ops

kvm_ioctl_create_device currently has knowledge of all the device types
and their associated ops. This is fairly inflexible when adding support
for new in-kernel device emulations, so move what we currently have out
into a table, which can support dynamic registration of ops by new
drivers for virtual hardware.

Cc: Alex Williamson <Alex.Williamson@redhat.com>
Cc: Alex Graf <agraf@suse.de>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  1 +
 include/uapi/linux/kvm.h | 22 +++++++++++-----
 virt/kvm/kvm_main.c      | 65 ++++++++++++++++++++++++++++--------------------
 3 files changed, 55 insertions(+), 33 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e098dce179df..b6e954742951 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1064,6 +1064,7 @@ struct kvm_device_ops {
 void kvm_device_get(struct kvm_device *dev);
 void kvm_device_put(struct kvm_device *dev);
 struct kvm_device *kvm_device_from_filp(struct file *filp);
+int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0695a1e3e332..60768822b140 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -943,15 +943,25 @@ struct kvm_device_attr {
 	__u64	addr;		/* userspace address of attr data */
 };
 
-#define KVM_DEV_TYPE_FSL_MPIC_20	1
-#define KVM_DEV_TYPE_FSL_MPIC_42	2
-#define KVM_DEV_TYPE_XICS		3
-#define KVM_DEV_TYPE_VFIO		4
 #define  KVM_DEV_VFIO_GROUP			1
 #define   KVM_DEV_VFIO_GROUP_ADD			1
 #define   KVM_DEV_VFIO_GROUP_DEL			2
-#define KVM_DEV_TYPE_ARM_VGIC_V2	5
-#define KVM_DEV_TYPE_FLIC		6
+
+enum kvm_device_type {
+	KVM_DEV_TYPE_FSL_MPIC_20	= 1,
+#define KVM_DEV_TYPE_FSL_MPIC_20	KVM_DEV_TYPE_FSL_MPIC_20
+	KVM_DEV_TYPE_FSL_MPIC_42,
+#define KVM_DEV_TYPE_FSL_MPIC_42	KVM_DEV_TYPE_FSL_MPIC_42
+	KVM_DEV_TYPE_XICS,
+#define KVM_DEV_TYPE_XICS		KVM_DEV_TYPE_XICS
+	KVM_DEV_TYPE_VFIO,
+#define KVM_DEV_TYPE_VFIO		KVM_DEV_TYPE_VFIO
+	KVM_DEV_TYPE_ARM_VGIC_V2,
+#define KVM_DEV_TYPE_ARM_VGIC_V2	KVM_DEV_TYPE_ARM_VGIC_V2
+	KVM_DEV_TYPE_FLIC,
+#define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
+	KVM_DEV_TYPE_MAX,
+};
 
 /*
  * ioctls for VM fds
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c338599804e0..686d783387a0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2272,44 +2272,55 @@ struct kvm_device *kvm_device_from_filp(struct file *filp)
 	return filp->private_data;
 }
 
-static int kvm_ioctl_create_device(struct kvm *kvm,
-				   struct kvm_create_device *cd)
-{
-	struct kvm_device_ops *ops = NULL;
-	struct kvm_device *dev;
-	bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
-	int ret;
-
-	switch (cd->type) {
+static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 #ifdef CONFIG_KVM_MPIC
-	case KVM_DEV_TYPE_FSL_MPIC_20:
-	case KVM_DEV_TYPE_FSL_MPIC_42:
-		ops = &kvm_mpic_ops;
-		break;
+	[KVM_DEV_TYPE_FSL_MPIC_20]	= &kvm_mpic_ops,
+	[KVM_DEV_TYPE_FSL_MPIC_42]	= &kvm_mpic_ops,
 #endif
+
 #ifdef CONFIG_KVM_XICS
-	case KVM_DEV_TYPE_XICS:
-		ops = &kvm_xics_ops;
-		break;
+	[KVM_DEV_TYPE_XICS]		= &kvm_xics_ops,
 #endif
+
 #ifdef CONFIG_KVM_VFIO
-	case KVM_DEV_TYPE_VFIO:
-		ops = &kvm_vfio_ops;
-		break;
+	[KVM_DEV_TYPE_VFIO]		= &kvm_vfio_ops,
 #endif
+
 #ifdef CONFIG_KVM_ARM_VGIC
-	case KVM_DEV_TYPE_ARM_VGIC_V2:
-		ops = &kvm_arm_vgic_v2_ops;
-		break;
+	[KVM_DEV_TYPE_ARM_VGIC_V2]	= &kvm_arm_vgic_v2_ops,
 #endif
+
 #ifdef CONFIG_S390
-	case KVM_DEV_TYPE_FLIC:
-		ops = &kvm_flic_ops;
-		break;
+	[KVM_DEV_TYPE_FLIC]		= &kvm_flic_ops,
 #endif
-	default:
+};
+
+int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
+{
+	if (type >= ARRAY_SIZE(kvm_device_ops_table))
+		return -ENOSPC;
+
+	if (kvm_device_ops_table[type] != NULL)
+		return -EEXIST;
+
+	kvm_device_ops_table[type] = ops;
+	return 0;
+}
+
+static int kvm_ioctl_create_device(struct kvm *kvm,
+				   struct kvm_create_device *cd)
+{
+	struct kvm_device_ops *ops = NULL;
+	struct kvm_device *dev;
+	bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
+	int ret;
+
+	if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
+		return -ENODEV;
+
+	ops = kvm_device_ops_table[cd->type];
+	if (ops == NULL)
 		return -ENODEV;
-	}
 
 	if (test)
 		return 0;
-- 
cgit v1.2.3


From 84d7fce693884897c6196cc98228a2ad56ae2a9a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 4 Sep 2014 14:30:22 +0200
Subject: netfilter: nf_tables: export rule-set generation ID

This patch exposes the ruleset generation ID in three ways:

1) The new command NFT_MSG_GETGEN that exposes the 32-bits ruleset
   generation ID. This ID is incremented in every commit and it
   should be large enough to avoid wraparound problems.

2) The less significant 16-bits of the generation ID are exposed through
   the nfgenmsg->res_id header field. This allows us to quickly catch
   if the ruleset has change between two consecutive list dumps from
   different object lists (in this specific case I think the risk of
   wraparound is unlikely).

3) Userspace subscribers may receive notifications of new rule-set
   generation after every commit. This also provides an alternative
   way to monitor the generation ID. If the events are lost, the
   userspace process hits a overrun error, so it knows that it is
   working with a stale ruleset anyway.

Patrick spotted that rule-set transformations in userspace may take
quite some time. In that case, it annotates the 32-bits generation ID
before fetching the rule-set, then:

1) it compares it to what we obtain after the transformation to
   make sure it is not working with a stale rule-set and no wraparound
   has ocurred.

2) it subscribes to ruleset notifications, so it can watch for new
   generation ID.

This is complementary to the NLM_F_DUMP_INTR approach, which allows
us to detect an interference in the middle one single list dumping.
There is no way to explicitly check that an interference has occurred
between two list dumps from the kernel, since it doesn't know how
many lists the userspace client is actually going to dump.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  16 ++++
 net/netfilter/nf_tables_api.c            | 140 +++++++++++++++++++++++++------
 2 files changed, 130 insertions(+), 26 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 66d66dd3ff79..b72ccfeaf865 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -51,6 +51,8 @@ enum nft_verdicts {
  * @NFT_MSG_NEWSETELEM: create a new set element (enum nft_set_elem_attributes)
  * @NFT_MSG_GETSETELEM: get a set element (enum nft_set_elem_attributes)
  * @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes)
+ * @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes)
+ * @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes)
  */
 enum nf_tables_msg_types {
 	NFT_MSG_NEWTABLE,
@@ -68,6 +70,8 @@ enum nf_tables_msg_types {
 	NFT_MSG_NEWSETELEM,
 	NFT_MSG_GETSETELEM,
 	NFT_MSG_DELSETELEM,
+	NFT_MSG_NEWGEN,
+	NFT_MSG_GETGEN,
 	NFT_MSG_MAX,
 };
 
@@ -812,4 +816,16 @@ enum nft_masq_attributes {
 };
 #define NFTA_MASQ_MAX		(__NFTA_MASQ_MAX - 1)
 
+/**
+ * enum nft_gen_attributes - nf_tables ruleset generation attributes
+ *
+ * @NFTA_GEN_ID: Ruleset generation ID (NLA_U32)
+ */
+enum nft_gen_attributes {
+	NFTA_GEN_UNSPEC,
+	NFTA_GEN_ID,
+	__NFTA_GEN_MAX
+};
+#define NFTA_GEN_MAX		(__NFTA_GEN_MAX - 1)
+
 #endif /* _LINUX_NF_TABLES_H */
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 82374601577e..a476b9962155 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -405,9 +405,9 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
 	[NFTA_TABLE_FLAGS]	= { .type = NLA_U32 },
 };
 
-static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
-				     int event, u32 flags, int family,
-				     const struct nft_table *table)
+static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
+				     u32 portid, u32 seq, int event, u32 flags,
+				     int family, const struct nft_table *table)
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
@@ -420,7 +420,7 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
 	nfmsg = nlmsg_data(nlh);
 	nfmsg->nfgen_family	= family;
 	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= 0;
+	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
 
 	if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
 	    nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
@@ -448,8 +448,8 @@ static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
 	if (skb == NULL)
 		goto err;
 
-	err = nf_tables_fill_table_info(skb, ctx->portid, ctx->seq, event, 0,
-					ctx->afi->family, ctx->table);
+	err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq,
+					event, 0, ctx->afi->family, ctx->table);
 	if (err < 0) {
 		kfree_skb(skb);
 		goto err;
@@ -488,7 +488,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
 			if (idx > s_idx)
 				memset(&cb->args[1], 0,
 				       sizeof(cb->args) - sizeof(cb->args[0]));
-			if (nf_tables_fill_table_info(skb,
+			if (nf_tables_fill_table_info(skb, net,
 						      NETLINK_CB(cb->skb).portid,
 						      cb->nlh->nlmsg_seq,
 						      NFT_MSG_NEWTABLE,
@@ -540,7 +540,7 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
 	if (!skb2)
 		return -ENOMEM;
 
-	err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid,
+	err = nf_tables_fill_table_info(skb2, net, NETLINK_CB(skb).portid,
 					nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0,
 					family, table);
 	if (err < 0)
@@ -914,9 +914,9 @@ nla_put_failure:
 	return -ENOSPC;
 }
 
-static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq,
-				     int event, u32 flags, int family,
-				     const struct nft_table *table,
+static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
+				     u32 portid, u32 seq, int event, u32 flags,
+				     int family, const struct nft_table *table,
 				     const struct nft_chain *chain)
 {
 	struct nlmsghdr *nlh;
@@ -930,7 +930,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq,
 	nfmsg = nlmsg_data(nlh);
 	nfmsg->nfgen_family	= family;
 	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= 0;
+	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
 
 	if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
 		goto nla_put_failure;
@@ -988,8 +988,8 @@ static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
 	if (skb == NULL)
 		goto err;
 
-	err = nf_tables_fill_chain_info(skb, ctx->portid, ctx->seq, event, 0,
-					ctx->afi->family, ctx->table,
+	err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq,
+					event, 0, ctx->afi->family, ctx->table,
 					ctx->chain);
 	if (err < 0) {
 		kfree_skb(skb);
@@ -1031,7 +1031,8 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
 				if (idx > s_idx)
 					memset(&cb->args[1], 0,
 					       sizeof(cb->args) - sizeof(cb->args[0]));
-				if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid,
+				if (nf_tables_fill_chain_info(skb, net,
+							      NETLINK_CB(cb->skb).portid,
 							      cb->nlh->nlmsg_seq,
 							      NFT_MSG_NEWCHAIN,
 							      NLM_F_MULTI,
@@ -1090,7 +1091,7 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
 	if (!skb2)
 		return -ENOMEM;
 
-	err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid,
+	err = nf_tables_fill_chain_info(skb2, net, NETLINK_CB(skb).portid,
 					nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0,
 					family, table, chain);
 	if (err < 0)
@@ -1647,8 +1648,9 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
 				    .len = NFT_USERDATA_MAXLEN },
 };
 
-static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
-				    int event, u32 flags, int family,
+static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
+				    u32 portid, u32 seq, int event,
+				    u32 flags, int family,
 				    const struct nft_table *table,
 				    const struct nft_chain *chain,
 				    const struct nft_rule *rule)
@@ -1668,7 +1670,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
 	nfmsg = nlmsg_data(nlh);
 	nfmsg->nfgen_family	= family;
 	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= 0;
+	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
 
 	if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
 		goto nla_put_failure;
@@ -1724,8 +1726,8 @@ static int nf_tables_rule_notify(const struct nft_ctx *ctx,
 	if (skb == NULL)
 		goto err;
 
-	err = nf_tables_fill_rule_info(skb, ctx->portid, ctx->seq, event, 0,
-				       ctx->afi->family, ctx->table,
+	err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq,
+				       event, 0, ctx->afi->family, ctx->table,
 				       ctx->chain, rule);
 	if (err < 0) {
 		kfree_skb(skb);
@@ -1771,7 +1773,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 					if (idx > s_idx)
 						memset(&cb->args[1], 0,
 						       sizeof(cb->args) - sizeof(cb->args[0]));
-					if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid,
+					if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
 								      cb->nlh->nlmsg_seq,
 								      NFT_MSG_NEWRULE,
 								      NLM_F_MULTI | NLM_F_APPEND,
@@ -1837,7 +1839,7 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
 	if (!skb2)
 		return -ENOMEM;
 
-	err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid,
+	err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid,
 				       nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
 				       family, table, chain, rule);
 	if (err < 0)
@@ -2321,7 +2323,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
 	nfmsg = nlmsg_data(nlh);
 	nfmsg->nfgen_family	= ctx->afi->family;
 	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= 0;
+	nfmsg->res_id		= htons(ctx->net->nft.base_seq & 0xffff);
 
 	if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
 		goto nla_put_failure;
@@ -2925,7 +2927,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	nfmsg = nlmsg_data(nlh);
 	nfmsg->nfgen_family = ctx.afi->family;
 	nfmsg->version      = NFNETLINK_V0;
-	nfmsg->res_id       = 0;
+	nfmsg->res_id	    = htons(ctx.net->nft.base_seq & 0xffff);
 
 	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name))
 		goto nla_put_failure;
@@ -3006,7 +3008,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
 	nfmsg = nlmsg_data(nlh);
 	nfmsg->nfgen_family	= ctx->afi->family;
 	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= 0;
+	nfmsg->res_id		= htons(ctx->net->nft.base_seq & 0xffff);
 
 	if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
 		goto nla_put_failure;
@@ -3293,6 +3295,87 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
 	return err;
 }
 
+static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
+				   u32 portid, u32 seq)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_NEWGEN;
+
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0);
+	if (nlh == NULL)
+		goto nla_put_failure;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family	= AF_UNSPEC;
+	nfmsg->version		= NFNETLINK_V0;
+	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
+
+	if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)))
+		goto nla_put_failure;
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_trim(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event)
+{
+	struct nlmsghdr *nlh = nlmsg_hdr(skb);
+	struct sk_buff *skb2;
+	int err;
+
+	if (nlmsg_report(nlh) &&
+	    !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+		return 0;
+
+	err = -ENOBUFS;
+	skb2 = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb2 == NULL)
+		goto err;
+
+	err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid,
+				      nlh->nlmsg_seq);
+	if (err < 0) {
+		kfree_skb(skb2);
+		goto err;
+	}
+
+	err = nfnetlink_send(skb2, net, NETLINK_CB(skb).portid,
+			     NFNLGRP_NFTABLES, nlmsg_report(nlh), GFP_KERNEL);
+err:
+	if (err < 0) {
+		nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
+				  err);
+	}
+	return err;
+}
+
+static int nf_tables_getgen(struct sock *nlsk, struct sk_buff *skb,
+			    const struct nlmsghdr *nlh,
+			    const struct nlattr * const nla[])
+{
+	struct net *net = sock_net(skb->sk);
+	struct sk_buff *skb2;
+	int err;
+
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (skb2 == NULL)
+		return -ENOMEM;
+
+	err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid,
+				      nlh->nlmsg_seq);
+	if (err < 0)
+		goto err;
+
+	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+err:
+	kfree_skb(skb2);
+	return err;
+}
+
 static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 	[NFT_MSG_NEWTABLE] = {
 		.call_batch	= nf_tables_newtable,
@@ -3369,6 +3452,9 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
 		.policy		= nft_set_elem_list_policy,
 	},
+	[NFT_MSG_GETGEN] = {
+		.call		= nf_tables_getgen,
+	},
 };
 
 static void nft_chain_commit_update(struct nft_trans *trans)
@@ -3526,6 +3612,8 @@ static int nf_tables_commit(struct sk_buff *skb)
 		call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu);
 	}
 
+	nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From d931589c01a20595d67192f075f9c84093c43c45 Mon Sep 17 00:00:00 2001
From: Inki Dae <inki.dae@samsung.com>
Date: Wed, 17 Sep 2014 22:48:45 +0900
Subject: drm/exynos: remove DRM_EXYNOS_GEM_MAP_OFFSET ioctl

This interface and relevant codes aren't used anymore.

Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_drv.c |  3 ---
 drivers/gpu/drm/exynos/exynos_drm_gem.c | 17 -----------------
 drivers/gpu/drm/exynos/exynos_drm_gem.h |  4 ----
 include/uapi/drm/exynos_drm.h           | 18 ------------------
 4 files changed, 42 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 2103d970d6ea..d69bd9723805 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -299,9 +299,6 @@ static const struct vm_operations_struct exynos_drm_gem_vm_ops = {
 static const struct drm_ioctl_desc exynos_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_CREATE, exynos_drm_gem_create_ioctl,
 			DRM_UNLOCKED | DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_MAP_OFFSET,
-			exynos_drm_gem_map_offset_ioctl, DRM_UNLOCKED |
-			DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_MMAP,
 			exynos_drm_gem_mmap_ioctl, DRM_UNLOCKED | DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_GET,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 15db80138382..2f3665de2d60 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -318,23 +318,6 @@ void exynos_drm_gem_put_dma_addr(struct drm_device *dev,
 	drm_gem_object_unreference_unlocked(obj);
 }
 
-int exynos_drm_gem_map_offset_ioctl(struct drm_device *dev, void *data,
-				    struct drm_file *file_priv)
-{
-	struct drm_exynos_gem_map_off *args = data;
-
-	DRM_DEBUG_KMS("handle = 0x%x, offset = 0x%lx\n",
-			args->handle, (unsigned long)args->offset);
-
-	if (!(dev->driver->driver_features & DRIVER_GEM)) {
-		DRM_ERROR("does not support GEM.\n");
-		return -ENODEV;
-	}
-
-	return exynos_drm_gem_dumb_map_offset(file_priv, dev, args->handle,
-			&args->offset);
-}
-
 int exynos_drm_gem_mmap_buffer(struct file *filp,
 				      struct vm_area_struct *vma)
 {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h
index 1592c0ba7de8..8e460940d118 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h
@@ -111,10 +111,6 @@ void exynos_drm_gem_put_dma_addr(struct drm_device *dev,
 					unsigned int gem_handle,
 					struct drm_file *filp);
 
-/* get buffer offset to map to user space. */
-int exynos_drm_gem_map_offset_ioctl(struct drm_device *dev, void *data,
-				    struct drm_file *file_priv);
-
 /*
  * mmap the physically continuous memory that a gem object contains
  * to user space.
diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h
index d5844122ff32..67a751c74a77 100644
--- a/include/uapi/drm/exynos_drm.h
+++ b/include/uapi/drm/exynos_drm.h
@@ -32,20 +32,6 @@ struct drm_exynos_gem_create {
 	unsigned int handle;
 };
 
-/**
- * A structure for getting buffer offset.
- *
- * @handle: a pointer to gem object created.
- * @pad: just padding to be 64-bit aligned.
- * @offset: relatived offset value of the memory region allocated.
- *	- this value should be set by user.
- */
-struct drm_exynos_gem_map_off {
-	unsigned int handle;
-	unsigned int pad;
-	uint64_t offset;
-};
-
 /**
  * A structure for mapping buffer.
  *
@@ -316,7 +302,6 @@ struct drm_exynos_ipp_cmd_ctrl {
 };
 
 #define DRM_EXYNOS_GEM_CREATE		0x00
-#define DRM_EXYNOS_GEM_MAP_OFFSET	0x01
 #define DRM_EXYNOS_GEM_MMAP		0x02
 /* Reserved 0x03 ~ 0x05 for exynos specific gem ioctl */
 #define DRM_EXYNOS_GEM_GET		0x04
@@ -336,9 +321,6 @@ struct drm_exynos_ipp_cmd_ctrl {
 #define DRM_IOCTL_EXYNOS_GEM_CREATE		DRM_IOWR(DRM_COMMAND_BASE + \
 		DRM_EXYNOS_GEM_CREATE, struct drm_exynos_gem_create)
 
-#define DRM_IOCTL_EXYNOS_GEM_MAP_OFFSET	DRM_IOWR(DRM_COMMAND_BASE + \
-		DRM_EXYNOS_GEM_MAP_OFFSET, struct drm_exynos_gem_map_off)
-
 #define DRM_IOCTL_EXYNOS_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + \
 		DRM_EXYNOS_GEM_MMAP, struct drm_exynos_gem_mmap)
 
-- 
cgit v1.2.3


From 832316c704fe3d15ae6ca9a552ae80411d1bbbcd Mon Sep 17 00:00:00 2001
From: Inki Dae <inki.dae@samsung.com>
Date: Thu, 18 Sep 2014 14:19:01 +0900
Subject: drm/exynos: use drm generic mmap interface

This patch removes DRM_EXYNOS_GEM_MMAP ictrl feature specific
to Exynos drm and instead uses drm generic mmap.

We had used the interface specific to Exynos drm to do mmap directly,
not to use demand paging which maps each page with physical memory
at page fault handler. We don't need the specific mmap interface
because the drm generic mmap which uses vm offset manager stuff can
also do mmap directly.

This patch makes a userspace region to be mapped with whole physical
memory region allocated by userspace request when mmap system call is
requested.

Changelog v2:
- do not set VM_IO, VM_DONTEXPEND and VM_DONTDUMP. These flags were already
  set by drm_gem_mmap
- do not include <linux/anon_inodes.h>, which isn't needed anymore.

Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_drv.c | 26 ----------
 drivers/gpu/drm/exynos/exynos_drm_drv.h |  1 -
 drivers/gpu/drm/exynos/exynos_drm_gem.c | 89 ++++++---------------------------
 drivers/gpu/drm/exynos/exynos_drm_gem.h | 10 ----
 include/uapi/drm/exynos_drm.h           | 22 --------
 5 files changed, 16 insertions(+), 132 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index d69bd9723805..513ba940bae0 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -15,7 +15,6 @@
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 
-#include <linux/anon_inodes.h>
 #include <linux/component.h>
 
 #include <drm/exynos_drm.h>
@@ -166,10 +165,6 @@ static int exynos_drm_unload(struct drm_device *dev)
 	return 0;
 }
 
-static const struct file_operations exynos_drm_gem_fops = {
-	.mmap = exynos_drm_gem_mmap_buffer,
-};
-
 static int exynos_drm_suspend(struct drm_device *dev, pm_message_t state)
 {
 	struct drm_connector *connector;
@@ -208,7 +203,6 @@ static int exynos_drm_resume(struct drm_device *dev)
 static int exynos_drm_open(struct drm_device *dev, struct drm_file *file)
 {
 	struct drm_exynos_file_private *file_priv;
-	struct file *anon_filp;
 	int ret;
 
 	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
@@ -221,21 +215,8 @@ static int exynos_drm_open(struct drm_device *dev, struct drm_file *file)
 	if (ret)
 		goto err_file_priv_free;
 
-	anon_filp = anon_inode_getfile("exynos_gem", &exynos_drm_gem_fops,
-					NULL, 0);
-	if (IS_ERR(anon_filp)) {
-		ret = PTR_ERR(anon_filp);
-		goto err_subdrv_close;
-	}
-
-	anon_filp->f_mode = FMODE_READ | FMODE_WRITE;
-	file_priv->anon_filp = anon_filp;
-
 	return ret;
 
-err_subdrv_close:
-	exynos_drm_subdrv_close(dev, file);
-
 err_file_priv_free:
 	kfree(file_priv);
 	file->driver_priv = NULL;
@@ -251,7 +232,6 @@ static void exynos_drm_preclose(struct drm_device *dev,
 static void exynos_drm_postclose(struct drm_device *dev, struct drm_file *file)
 {
 	struct exynos_drm_private *private = dev->dev_private;
-	struct drm_exynos_file_private *file_priv;
 	struct drm_pending_vblank_event *v, *vt;
 	struct drm_pending_event *e, *et;
 	unsigned long flags;
@@ -277,10 +257,6 @@ static void exynos_drm_postclose(struct drm_device *dev, struct drm_file *file)
 	}
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 
-	file_priv = file->driver_priv;
-	if (file_priv->anon_filp)
-		fput(file_priv->anon_filp);
-
 	kfree(file->driver_priv);
 	file->driver_priv = NULL;
 }
@@ -299,8 +275,6 @@ static const struct vm_operations_struct exynos_drm_gem_vm_ops = {
 static const struct drm_ioctl_desc exynos_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_CREATE, exynos_drm_gem_create_ioctl,
 			DRM_UNLOCKED | DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_MMAP,
-			exynos_drm_gem_mmap_ioctl, DRM_UNLOCKED | DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_GET,
 			exynos_drm_gem_get_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(EXYNOS_VIDI_CONNECTION,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index 69a6fa397d75..d22e640f59a0 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -240,7 +240,6 @@ struct exynos_drm_g2d_private {
 struct drm_exynos_file_private {
 	struct exynos_drm_g2d_private	*g2d_priv;
 	struct device			*ipp_dev;
-	struct file			*anon_filp;
 };
 
 /*
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 2f3665de2d60..0d5b9698d384 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -318,23 +318,16 @@ void exynos_drm_gem_put_dma_addr(struct drm_device *dev,
 	drm_gem_object_unreference_unlocked(obj);
 }
 
-int exynos_drm_gem_mmap_buffer(struct file *filp,
+int exynos_drm_gem_mmap_buffer(struct exynos_drm_gem_obj *exynos_gem_obj,
 				      struct vm_area_struct *vma)
 {
-	struct drm_gem_object *obj = filp->private_data;
-	struct exynos_drm_gem_obj *exynos_gem_obj = to_exynos_gem_obj(obj);
-	struct drm_device *drm_dev = obj->dev;
+	struct drm_device *drm_dev = exynos_gem_obj->base.dev;
 	struct exynos_drm_gem_buf *buffer;
 	unsigned long vm_size;
 	int ret;
 
-	WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex));
-
-	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
-	vma->vm_private_data = obj;
-	vma->vm_ops = drm_dev->driver->gem_vm_ops;
-
-	update_vm_cache_attr(exynos_gem_obj, vma);
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_pgoff = 0;
 
 	vm_size = vma->vm_end - vma->vm_start;
 
@@ -356,60 +349,6 @@ int exynos_drm_gem_mmap_buffer(struct file *filp,
 		return ret;
 	}
 
-	/*
-	 * take a reference to this mapping of the object. And this reference
-	 * is unreferenced by the corresponding vm_close call.
-	 */
-	drm_gem_object_reference(obj);
-
-	drm_vm_open_locked(drm_dev, vma);
-
-	return 0;
-}
-
-int exynos_drm_gem_mmap_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv)
-{
-	struct drm_exynos_file_private *exynos_file_priv;
-	struct drm_exynos_gem_mmap *args = data;
-	struct drm_gem_object *obj;
-	struct file *anon_filp;
-	unsigned long addr;
-
-	if (!(dev->driver->driver_features & DRIVER_GEM)) {
-		DRM_ERROR("does not support GEM.\n");
-		return -ENODEV;
-	}
-
-	mutex_lock(&dev->struct_mutex);
-
-	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
-	if (!obj) {
-		DRM_ERROR("failed to lookup gem object.\n");
-		mutex_unlock(&dev->struct_mutex);
-		return -EINVAL;
-	}
-
-	exynos_file_priv = file_priv->driver_priv;
-	anon_filp = exynos_file_priv->anon_filp;
-	anon_filp->private_data = obj;
-
-	addr = vm_mmap(anon_filp, 0, args->size, PROT_READ | PROT_WRITE,
-			MAP_SHARED, 0);
-
-	drm_gem_object_unreference(obj);
-
-	if (IS_ERR_VALUE(addr)) {
-		mutex_unlock(&dev->struct_mutex);
-		return (int)addr;
-	}
-
-	mutex_unlock(&dev->struct_mutex);
-
-	args->mapped = addr;
-
-	DRM_DEBUG_KMS("mapped = 0x%lx\n", (unsigned long)args->mapped);
-
 	return 0;
 }
 
@@ -693,16 +632,20 @@ int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 	exynos_gem_obj = to_exynos_gem_obj(obj);
 
 	ret = check_gem_flags(exynos_gem_obj->flags);
-	if (ret) {
-		drm_gem_vm_close(vma);
-		drm_gem_free_mmap_offset(obj);
-		return ret;
-	}
-
-	vma->vm_flags &= ~VM_PFNMAP;
-	vma->vm_flags |= VM_MIXEDMAP;
+	if (ret)
+		goto err_close_vm;
 
 	update_vm_cache_attr(exynos_gem_obj, vma);
 
+	ret = exynos_drm_gem_mmap_buffer(exynos_gem_obj, vma);
+	if (ret)
+		goto err_close_vm;
+
+	return ret;
+
+err_close_vm:
+	drm_gem_vm_close(vma);
+	drm_gem_free_mmap_offset(obj);
+
 	return ret;
 }
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h
index 8e460940d118..09d021bbccf5 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h
@@ -111,16 +111,6 @@ void exynos_drm_gem_put_dma_addr(struct drm_device *dev,
 					unsigned int gem_handle,
 					struct drm_file *filp);
 
-/*
- * mmap the physically continuous memory that a gem object contains
- * to user space.
- */
-int exynos_drm_gem_mmap_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
-
-int exynos_drm_gem_mmap_buffer(struct file *filp,
-				      struct vm_area_struct *vma);
-
 /* map user space allocated by malloc to pages. */
 int exynos_drm_gem_userptr_ioctl(struct drm_device *dev, void *data,
 				      struct drm_file *file_priv);
diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h
index 67a751c74a77..5575ed1598bd 100644
--- a/include/uapi/drm/exynos_drm.h
+++ b/include/uapi/drm/exynos_drm.h
@@ -32,24 +32,6 @@ struct drm_exynos_gem_create {
 	unsigned int handle;
 };
 
-/**
- * A structure for mapping buffer.
- *
- * @handle: a handle to gem object created.
- * @pad: just padding to be 64-bit aligned.
- * @size: memory size to be mapped.
- * @mapped: having user virtual address mmaped.
- *	- this variable would be filled by exynos gem module
- *	of kernel side with user virtual address which is allocated
- *	by do_mmap().
- */
-struct drm_exynos_gem_mmap {
-	unsigned int handle;
-	unsigned int pad;
-	uint64_t size;
-	uint64_t mapped;
-};
-
 /**
  * A structure to gem information.
  *
@@ -302,7 +284,6 @@ struct drm_exynos_ipp_cmd_ctrl {
 };
 
 #define DRM_EXYNOS_GEM_CREATE		0x00
-#define DRM_EXYNOS_GEM_MMAP		0x02
 /* Reserved 0x03 ~ 0x05 for exynos specific gem ioctl */
 #define DRM_EXYNOS_GEM_GET		0x04
 #define DRM_EXYNOS_VIDI_CONNECTION	0x07
@@ -321,9 +302,6 @@ struct drm_exynos_ipp_cmd_ctrl {
 #define DRM_IOCTL_EXYNOS_GEM_CREATE		DRM_IOWR(DRM_COMMAND_BASE + \
 		DRM_EXYNOS_GEM_CREATE, struct drm_exynos_gem_create)
 
-#define DRM_IOCTL_EXYNOS_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + \
-		DRM_EXYNOS_GEM_MMAP, struct drm_exynos_gem_mmap)
-
 #define DRM_IOCTL_EXYNOS_GEM_GET	DRM_IOWR(DRM_COMMAND_BASE + \
 		DRM_EXYNOS_GEM_GET,	struct drm_exynos_gem_info)
 
-- 
cgit v1.2.3


From 23461551c00628c3f3fe9cf837bf53cf8f212b63 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 17 Sep 2014 12:25:56 -0700
Subject: fou: Support for foo-over-udp RX path

This patch provides a receive path for foo-over-udp. This allows
direct encapsulation of IP protocols over UDP. The bound destination
port is used to map to an IP protocol, and the XFRM framework
(udp_encap_rcv) is used to receive encapsulated packets. Upon
reception, the encapsulation header is logically removed (pointer
to transport header is advanced) and the packet is reinjected into
the receive path with the IP protocol indicated by the mapping.

Netlink is used to configure FOU ports. The configuration information
includes the port number to bind to and the IP protocol corresponding
to that port.

This should support GRE/UDP
(http://tools.ietf.org/html/draft-yong-tsvwg-gre-in-udp-encap-02),
as will as the other IP tunneling protocols (IPIP, SIT).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/fou.h |  32 ++++++
 net/ipv4/Kconfig         |  10 ++
 net/ipv4/Makefile        |   1 +
 net/ipv4/fou.c           | 279 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 322 insertions(+)
 create mode 100644 include/uapi/linux/fou.h
 create mode 100644 net/ipv4/fou.c

(limited to 'include/uapi')

diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h
new file mode 100644
index 000000000000..e03376de453d
--- /dev/null
+++ b/include/uapi/linux/fou.h
@@ -0,0 +1,32 @@
+/* fou.h - FOU Interface */
+
+#ifndef _UAPI_LINUX_FOU_H
+#define _UAPI_LINUX_FOU_H
+
+/* NETLINK_GENERIC related info
+ */
+#define FOU_GENL_NAME		"fou"
+#define FOU_GENL_VERSION	0x1
+
+enum {
+	FOU_ATTR_UNSPEC,
+	FOU_ATTR_PORT,				/* u16 */
+	FOU_ATTR_AF,				/* u8 */
+	FOU_ATTR_IPPROTO,			/* u8 */
+
+	__FOU_ATTR_MAX,
+};
+
+#define FOU_ATTR_MAX		(__FOU_ATTR_MAX - 1)
+
+enum {
+	FOU_CMD_UNSPEC,
+	FOU_CMD_ADD,
+	FOU_CMD_DEL,
+
+	__FOU_CMD_MAX,
+};
+
+#define FOU_CMD_MAX	(__FOU_CMD_MAX - 1)
+
+#endif /* _UAPI_LINUX_FOU_H */
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index dbc10d84161f..84f710b7472a 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -311,6 +311,16 @@ config NET_UDP_TUNNEL
 	tristate
 	default n
 
+config NET_FOU
+	tristate "IP: Foo (IP protocols) over UDP"
+	select XFRM
+	select NET_UDP_TUNNEL
+	---help---
+	  Foo over UDP allows any IP protocol to be directly encapsulated
+	  over UDP include tunnels (IPIP, GRE, SIT). By encapsulating in UDP
+	  network mechanisms and optimizations for UDP (such as ECMP
+	  and RSS) can be leveraged to provide better service.
+
 config INET_AH
 	tristate "IP: AH transformation"
 	select XFRM_ALGO
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 8ee1cd4053ee..d78d404c596f 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
 obj-$(CONFIG_IP_MROUTE) += ipmr.o
 obj-$(CONFIG_NET_IPIP) += ipip.o
 gre-y := gre_demux.o
+obj-$(CONFIG_NET_FOU) += fou.o
 obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
 obj-$(CONFIG_NET_IPGRE) += ip_gre.o
 obj-$(CONFIG_NET_UDP_TUNNEL) += udp_tunnel.o
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
new file mode 100644
index 000000000000..d44f97b79418
--- /dev/null
+++ b/net/ipv4/fou.c
@@ -0,0 +1,279 @@
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <net/genetlink.h>
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/udp_tunnel.h>
+#include <net/xfrm.h>
+#include <uapi/linux/fou.h>
+#include <uapi/linux/genetlink.h>
+
+static DEFINE_SPINLOCK(fou_lock);
+static LIST_HEAD(fou_list);
+
+struct fou {
+	struct socket *sock;
+	u8 protocol;
+	u16 port;
+	struct list_head list;
+};
+
+struct fou_cfg {
+	u8 protocol;
+	struct udp_port_cfg udp_config;
+};
+
+static inline struct fou *fou_from_sock(struct sock *sk)
+{
+	return sk->sk_user_data;
+}
+
+static int fou_udp_encap_recv_deliver(struct sk_buff *skb,
+				      u8 protocol, size_t len)
+{
+	struct iphdr *iph = ip_hdr(skb);
+
+	/* Remove 'len' bytes from the packet (UDP header and
+	 * FOU header if present), modify the protocol to the one
+	 * we found, and then call rcv_encap.
+	 */
+	iph->tot_len = htons(ntohs(iph->tot_len) - len);
+	__skb_pull(skb, len);
+	skb_postpull_rcsum(skb, udp_hdr(skb), len);
+	skb_reset_transport_header(skb);
+
+	return -protocol;
+}
+
+static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
+{
+	struct fou *fou = fou_from_sock(sk);
+
+	if (!fou)
+		return 1;
+
+	return fou_udp_encap_recv_deliver(skb, fou->protocol,
+					  sizeof(struct udphdr));
+}
+
+static int fou_add_to_port_list(struct fou *fou)
+{
+	struct fou *fout;
+
+	spin_lock(&fou_lock);
+	list_for_each_entry(fout, &fou_list, list) {
+		if (fou->port == fout->port) {
+			spin_unlock(&fou_lock);
+			return -EALREADY;
+		}
+	}
+
+	list_add(&fou->list, &fou_list);
+	spin_unlock(&fou_lock);
+
+	return 0;
+}
+
+static void fou_release(struct fou *fou)
+{
+	struct socket *sock = fou->sock;
+	struct sock *sk = sock->sk;
+
+	udp_del_offload(&fou->udp_offloads);
+
+	list_del(&fou->list);
+
+	/* Remove hooks into tunnel socket */
+	sk->sk_user_data = NULL;
+
+	sock_release(sock);
+
+	kfree(fou);
+}
+
+static int fou_create(struct net *net, struct fou_cfg *cfg,
+		      struct socket **sockp)
+{
+	struct fou *fou = NULL;
+	int err;
+	struct socket *sock = NULL;
+	struct sock *sk;
+
+	/* Open UDP socket */
+	err = udp_sock_create(net, &cfg->udp_config, &sock);
+	if (err < 0)
+		goto error;
+
+	/* Allocate FOU port structure */
+	fou = kzalloc(sizeof(*fou), GFP_KERNEL);
+	if (!fou) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	sk = sock->sk;
+
+	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
+	fou->protocol = cfg->protocol;
+	fou->port =  cfg->udp_config.local_udp_port;
+	udp_sk(sk)->encap_rcv = fou_udp_recv;
+
+	udp_sk(sk)->encap_type = 1;
+	udp_encap_enable();
+
+	sk->sk_user_data = fou;
+	fou->sock = sock;
+
+	udp_set_convert_csum(sk, true);
+
+	sk->sk_allocation = GFP_ATOMIC;
+
+	err = fou_add_to_port_list(fou);
+	if (err)
+		goto error;
+
+	if (sockp)
+		*sockp = sock;
+
+	return 0;
+
+error:
+	kfree(fou);
+	if (sock)
+		sock_release(sock);
+
+	return err;
+}
+
+static int fou_destroy(struct net *net, struct fou_cfg *cfg)
+{
+	struct fou *fou;
+	u16 port = cfg->udp_config.local_udp_port;
+	int err = -EINVAL;
+
+	spin_lock(&fou_lock);
+	list_for_each_entry(fou, &fou_list, list) {
+		if (fou->port == port) {
+			fou_release(fou);
+			err = 0;
+			break;
+		}
+	}
+	spin_unlock(&fou_lock);
+
+	return err;
+}
+
+static struct genl_family fou_nl_family = {
+	.id		= GENL_ID_GENERATE,
+	.hdrsize	= 0,
+	.name		= FOU_GENL_NAME,
+	.version	= FOU_GENL_VERSION,
+	.maxattr	= FOU_ATTR_MAX,
+	.netnsok	= true,
+};
+
+static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
+	[FOU_ATTR_PORT] = { .type = NLA_U16, },
+	[FOU_ATTR_AF] = { .type = NLA_U8, },
+	[FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
+};
+
+static int parse_nl_config(struct genl_info *info,
+			   struct fou_cfg *cfg)
+{
+	memset(cfg, 0, sizeof(*cfg));
+
+	cfg->udp_config.family = AF_INET;
+
+	if (info->attrs[FOU_ATTR_AF]) {
+		u8 family = nla_get_u8(info->attrs[FOU_ATTR_AF]);
+
+		if (family != AF_INET && family != AF_INET6)
+			return -EINVAL;
+
+		cfg->udp_config.family = family;
+	}
+
+	if (info->attrs[FOU_ATTR_PORT]) {
+		u16 port = nla_get_u16(info->attrs[FOU_ATTR_PORT]);
+
+		cfg->udp_config.local_udp_port = port;
+	}
+
+	if (info->attrs[FOU_ATTR_IPPROTO])
+		cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]);
+
+	return 0;
+}
+
+static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info)
+{
+	struct fou_cfg cfg;
+	int err;
+
+	err = parse_nl_config(info, &cfg);
+	if (err)
+		return err;
+
+	return fou_create(&init_net, &cfg, NULL);
+}
+
+static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info)
+{
+	struct fou_cfg cfg;
+
+	parse_nl_config(info, &cfg);
+
+	return fou_destroy(&init_net, &cfg);
+}
+
+static const struct genl_ops fou_nl_ops[] = {
+	{
+		.cmd = FOU_CMD_ADD,
+		.doit = fou_nl_cmd_add_port,
+		.policy = fou_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = FOU_CMD_DEL,
+		.doit = fou_nl_cmd_rm_port,
+		.policy = fou_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+};
+
+static int __init fou_init(void)
+{
+	int ret;
+
+	ret = genl_register_family_with_ops(&fou_nl_family,
+					    fou_nl_ops);
+
+	return ret;
+}
+
+static void __exit fou_fini(void)
+{
+	struct fou *fou, *next;
+
+	genl_unregister_family(&fou_nl_family);
+
+	/* Close all the FOU sockets */
+
+	spin_lock(&fou_lock);
+	list_for_each_entry_safe(fou, next, &fou_list, list)
+		fou_release(fou);
+	spin_unlock(&fou_lock);
+}
+
+module_init(fou_init);
+module_exit(fou_fini);
+MODULE_AUTHOR("Tom Herbert <therbert@google.com>");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 56328486539ddd07cbaafec7a542a2c8a3043623 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 17 Sep 2014 12:25:58 -0700
Subject: net: Changes to ip_tunnel to support foo-over-udp encapsulation

This patch changes IP tunnel to support (secondary) encapsulation,
Foo-over-UDP. Changes include:

1) Adding tun_hlen as the tunnel header length, encap_hlen as the
   encapsulation header length, and hlen becomes the grand total
   of these.
2) Added common netlink define to support FOU encapsulation.
3) Routines to perform FOU encapsulation.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h       | 19 ++++++++-
 include/uapi/linux/if_tunnel.h | 12 ++++++
 net/ipv4/ip_tunnel.c           | 91 +++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 120 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 8dd8cab88b87..7f538ba6e267 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -10,6 +10,7 @@
 #include <net/gro_cells.h>
 #include <net/inet_ecn.h>
 #include <net/ip.h>
+#include <net/netns/generic.h>
 #include <net/rtnetlink.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -31,6 +32,13 @@ struct ip_tunnel_6rd_parm {
 };
 #endif
 
+struct ip_tunnel_encap {
+	__u16			type;
+	__u16			flags;
+	__be16			sport;
+	__be16			dport;
+};
+
 struct ip_tunnel_prl_entry {
 	struct ip_tunnel_prl_entry __rcu *next;
 	__be32				addr;
@@ -56,13 +64,18 @@ struct ip_tunnel {
 	/* These four fields used only by GRE */
 	__u32		i_seqno;	/* The last seen seqno	*/
 	__u32		o_seqno;	/* The last output seqno */
-	int		hlen;		/* Precalculated header length */
+	int		tun_hlen;	/* Precalculated header length */
 	int		mlink;
 
 	struct ip_tunnel_dst __percpu *dst_cache;
 
 	struct ip_tunnel_parm parms;
 
+	int		encap_hlen;	/* Encap header length (FOU,GUE) */
+	struct ip_tunnel_encap encap;
+
+	int		hlen;		/* tun_hlen + encap_hlen */
+
 	/* for SIT */
 #ifdef CONFIG_IPV6_SIT_6RD
 	struct ip_tunnel_6rd_parm ip6rd;
@@ -114,6 +127,8 @@ void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops);
 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 		    const struct iphdr *tnl_params, const u8 protocol);
 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
+int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
+		    u8 *protocol, struct flowi4 *fl4);
 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
 
 struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
@@ -131,6 +146,8 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
 		      struct ip_tunnel_parm *p);
 void ip_tunnel_setup(struct net_device *dev, int net_id);
 void ip_tunnel_dst_reset_all(struct ip_tunnel *t);
+int ip_tunnel_encap_setup(struct ip_tunnel *t,
+			  struct ip_tunnel_encap *ipencap);
 
 /* Extract dsfield from inner protocol */
 static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 3bce9e9d9f7c..9fedca7d8dae 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -53,10 +53,22 @@ enum {
 	IFLA_IPTUN_6RD_RELAY_PREFIX,
 	IFLA_IPTUN_6RD_PREFIXLEN,
 	IFLA_IPTUN_6RD_RELAY_PREFIXLEN,
+	IFLA_IPTUN_ENCAP_TYPE,
+	IFLA_IPTUN_ENCAP_FLAGS,
+	IFLA_IPTUN_ENCAP_SPORT,
+	IFLA_IPTUN_ENCAP_DPORT,
 	__IFLA_IPTUN_MAX,
 };
 #define IFLA_IPTUN_MAX	(__IFLA_IPTUN_MAX - 1)
 
+enum tunnel_encap_types {
+	TUNNEL_ENCAP_NONE,
+	TUNNEL_ENCAP_FOU,
+};
+
+#define TUNNEL_ENCAP_FLAG_CSUM		(1<<0)
+#define TUNNEL_ENCAP_FLAG_CSUM6		(1<<1)
+
 /* SIT-mode i_flags */
 #define	SIT_ISATAP	0x0001
 
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index afed1aac2638..e3a3dc91e49c 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -55,6 +55,7 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
+#include <net/udp.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
@@ -487,6 +488,91 @@ drop:
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
 
+static int ip_encap_hlen(struct ip_tunnel_encap *e)
+{
+	switch (e->type) {
+	case TUNNEL_ENCAP_NONE:
+		return 0;
+	case TUNNEL_ENCAP_FOU:
+		return sizeof(struct udphdr);
+	default:
+		return -EINVAL;
+	}
+}
+
+int ip_tunnel_encap_setup(struct ip_tunnel *t,
+			  struct ip_tunnel_encap *ipencap)
+{
+	int hlen;
+
+	memset(&t->encap, 0, sizeof(t->encap));
+
+	hlen = ip_encap_hlen(ipencap);
+	if (hlen < 0)
+		return hlen;
+
+	t->encap.type = ipencap->type;
+	t->encap.sport = ipencap->sport;
+	t->encap.dport = ipencap->dport;
+	t->encap.flags = ipencap->flags;
+
+	t->encap_hlen = hlen;
+	t->hlen = t->encap_hlen + t->tun_hlen;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
+
+static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+			    size_t hdr_len, u8 *protocol, struct flowi4 *fl4)
+{
+	struct udphdr *uh;
+	__be16 sport;
+	bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
+	int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+
+	skb = iptunnel_handle_offloads(skb, csum, type);
+
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	/* Get length and hash before making space in skb */
+
+	sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
+					       skb, 0, 0, false);
+
+	skb_push(skb, hdr_len);
+
+	skb_reset_transport_header(skb);
+	uh = udp_hdr(skb);
+
+	uh->dest = e->dport;
+	uh->source = sport;
+	uh->len = htons(skb->len);
+	uh->check = 0;
+	udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
+		     fl4->saddr, fl4->daddr, skb->len);
+
+	*protocol = IPPROTO_UDP;
+
+	return 0;
+}
+
+int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
+		    u8 *protocol, struct flowi4 *fl4)
+{
+	switch (t->encap.type) {
+	case TUNNEL_ENCAP_NONE:
+		return 0;
+	case TUNNEL_ENCAP_FOU:
+		return fou_build_header(skb, &t->encap, t->encap_hlen,
+					protocol, fl4);
+	default:
+		return -EINVAL;
+	}
+}
+EXPORT_SYMBOL(ip_tunnel_encap);
+
 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 			    struct rtable *rt, __be16 df)
 {
@@ -536,7 +622,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 }
 
 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
-		    const struct iphdr *tnl_params, const u8 protocol)
+		    const struct iphdr *tnl_params, u8 protocol)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	const struct iphdr *inner_iph;
@@ -617,6 +703,9 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 	init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
 			 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
 
+	if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
+		goto tx_error;
+
 	rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
 
 	if (!rt) {
-- 
cgit v1.2.3


From 4565e9919cda747815547e2e5d7b78f15efbffdf Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 17 Sep 2014 12:26:01 -0700
Subject: gre: Setup and TX path for gre/UDP foo-over-udp encapsulation

Added netlink attrs to configure FOU encapsulation for GRE, netlink
handling of these flags, and properly adjust MTU for encapsulation.
ip_tunnel_encap is called from ip_tunnel_xmit to actually perform FOU
encapsulation.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_tunnel.h |  4 ++
 net/ipv4/ip_gre.c              | 90 +++++++++++++++++++++++++++++++++++++++---
 2 files changed, 89 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 9fedca7d8dae..7c832afdfa94 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -106,6 +106,10 @@ enum {
 	IFLA_GRE_ENCAP_LIMIT,
 	IFLA_GRE_FLOWINFO,
 	IFLA_GRE_FLAGS,
+	IFLA_GRE_ENCAP_TYPE,
+	IFLA_GRE_ENCAP_FLAGS,
+	IFLA_GRE_ENCAP_SPORT,
+	IFLA_GRE_ENCAP_DPORT,
 	__IFLA_GRE_MAX,
 };
 
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 9b842544aea3..829aff8bf723 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -239,7 +239,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
 	tpi.seq = htonl(tunnel->o_seqno);
 
 	/* Push GRE header. */
-	gre_build_header(skb, &tpi, tunnel->hlen);
+	gre_build_header(skb, &tpi, tunnel->tun_hlen);
 
 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
 }
@@ -310,7 +310,7 @@ out:
 static int ipgre_tunnel_ioctl(struct net_device *dev,
 			      struct ifreq *ifr, int cmd)
 {
-	int err = 0;
+	int err;
 	struct ip_tunnel_parm p;
 
 	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
@@ -470,13 +470,18 @@ static void ipgre_tunnel_setup(struct net_device *dev)
 static void __gre_tunnel_init(struct net_device *dev)
 {
 	struct ip_tunnel *tunnel;
+	int t_hlen;
 
 	tunnel = netdev_priv(dev);
-	tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
+	tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
 	tunnel->parms.iph.protocol = IPPROTO_GRE;
 
-	dev->needed_headroom	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;
-	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4;
+	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
+
+	t_hlen = tunnel->hlen + sizeof(struct iphdr);
+
+	dev->needed_headroom	= LL_MAX_HEADER + t_hlen + 4;
+	dev->mtu		= ETH_DATA_LEN - t_hlen - 4;
 
 	dev->features		|= GRE_FEATURES;
 	dev->hw_features	|= GRE_FEATURES;
@@ -628,6 +633,40 @@ static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
 		parms->iph.frag_off = htons(IP_DF);
 }
 
+/* This function returns true when ENCAP attributes are present in the nl msg */
+static bool ipgre_netlink_encap_parms(struct nlattr *data[],
+				      struct ip_tunnel_encap *ipencap)
+{
+	bool ret = false;
+
+	memset(ipencap, 0, sizeof(*ipencap));
+
+	if (!data)
+		return ret;
+
+	if (data[IFLA_GRE_ENCAP_TYPE]) {
+		ret = true;
+		ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
+	}
+
+	if (data[IFLA_GRE_ENCAP_FLAGS]) {
+		ret = true;
+		ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
+	}
+
+	if (data[IFLA_GRE_ENCAP_SPORT]) {
+		ret = true;
+		ipencap->sport = nla_get_u16(data[IFLA_GRE_ENCAP_SPORT]);
+	}
+
+	if (data[IFLA_GRE_ENCAP_DPORT]) {
+		ret = true;
+		ipencap->dport = nla_get_u16(data[IFLA_GRE_ENCAP_DPORT]);
+	}
+
+	return ret;
+}
+
 static int gre_tap_init(struct net_device *dev)
 {
 	__gre_tunnel_init(dev);
@@ -657,6 +696,15 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[])
 {
 	struct ip_tunnel_parm p;
+	struct ip_tunnel_encap ipencap;
+
+	if (ipgre_netlink_encap_parms(data, &ipencap)) {
+		struct ip_tunnel *t = netdev_priv(dev);
+		int err = ip_tunnel_encap_setup(t, &ipencap);
+
+		if (err < 0)
+			return err;
+	}
 
 	ipgre_netlink_parms(data, tb, &p);
 	return ip_tunnel_newlink(dev, tb, &p);
@@ -666,6 +714,15 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
 			    struct nlattr *data[])
 {
 	struct ip_tunnel_parm p;
+	struct ip_tunnel_encap ipencap;
+
+	if (ipgre_netlink_encap_parms(data, &ipencap)) {
+		struct ip_tunnel *t = netdev_priv(dev);
+		int err = ip_tunnel_encap_setup(t, &ipencap);
+
+		if (err < 0)
+			return err;
+	}
 
 	ipgre_netlink_parms(data, tb, &p);
 	return ip_tunnel_changelink(dev, tb, &p);
@@ -694,6 +751,14 @@ static size_t ipgre_get_size(const struct net_device *dev)
 		nla_total_size(1) +
 		/* IFLA_GRE_PMTUDISC */
 		nla_total_size(1) +
+		/* IFLA_GRE_ENCAP_TYPE */
+		nla_total_size(2) +
+		/* IFLA_GRE_ENCAP_FLAGS */
+		nla_total_size(2) +
+		/* IFLA_GRE_ENCAP_SPORT */
+		nla_total_size(2) +
+		/* IFLA_GRE_ENCAP_DPORT */
+		nla_total_size(2) +
 		0;
 }
 
@@ -714,6 +779,17 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	    nla_put_u8(skb, IFLA_GRE_PMTUDISC,
 		       !!(p->iph.frag_off & htons(IP_DF))))
 		goto nla_put_failure;
+
+	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
+			t->encap.type) ||
+	    nla_put_u16(skb, IFLA_GRE_ENCAP_SPORT,
+			t->encap.sport) ||
+	    nla_put_u16(skb, IFLA_GRE_ENCAP_DPORT,
+			t->encap.dport) ||
+	    nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
+			t->encap.dport))
+		goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
@@ -731,6 +807,10 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
 	[IFLA_GRE_TTL]		= { .type = NLA_U8 },
 	[IFLA_GRE_TOS]		= { .type = NLA_U8 },
 	[IFLA_GRE_PMTUDISC]	= { .type = NLA_U8 },
+	[IFLA_GRE_ENCAP_TYPE]	= { .type = NLA_U16 },
+	[IFLA_GRE_ENCAP_FLAGS]	= { .type = NLA_U16 },
+	[IFLA_GRE_ENCAP_SPORT]	= { .type = NLA_U16 },
+	[IFLA_GRE_ENCAP_DPORT]	= { .type = NLA_U16 },
 };
 
 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
-- 
cgit v1.2.3


From e3d6eb1c16ef174a8fbbdd40770f5cbace0710e4 Mon Sep 17 00:00:00 2001
From: Vincent Palatin <vpalatin@chromium.org>
Date: Wed, 3 Sep 2014 16:38:39 -0300
Subject: [media] v4l: Add camera pan/tilt speed controls

The V4L2_CID_PAN_SPEED and V4L2_CID_TILT_SPEED controls allow to move the
camera by setting its rotation speed around its axis.

Signed-off-by: Vincent Palatin <vpalatin@chromium.org>
Reviewed-by: Pawel Osciak <posciak@chromium.org>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/v4l/compat.xml   | 10 ++++++++++
 Documentation/DocBook/media/v4l/controls.xml | 21 +++++++++++++++++++++
 drivers/media/v4l2-core/v4l2-ctrls.c         |  2 ++
 include/uapi/linux/v4l2-controls.h           |  2 ++
 4 files changed, 35 insertions(+)

(limited to 'include/uapi')

diff --git a/Documentation/DocBook/media/v4l/compat.xml b/Documentation/DocBook/media/v4l/compat.xml
index eee6f0f4aa43..7aa7c5d41e3b 100644
--- a/Documentation/DocBook/media/v4l/compat.xml
+++ b/Documentation/DocBook/media/v4l/compat.xml
@@ -2545,6 +2545,16 @@ fields changed from _s32 to _u32.
       </orderedlist>
     </section>
 
+    <section>
+      <title>V4L2 in Linux 3.18</title>
+      <orderedlist>
+	<listitem>
+	  <para>Added <constant>V4L2_CID_PAN_SPEED</constant> and
+ <constant>V4L2_CID_TILT_SPEED</constant> camera controls.</para>
+	</listitem>
+      </orderedlist>
+    </section>
+
     <section id="other">
       <title>Relation of V4L2 to other Linux multimedia APIs</title>
 
diff --git a/Documentation/DocBook/media/v4l/controls.xml b/Documentation/DocBook/media/v4l/controls.xml
index a7eb1bde8b92..e013e4bf244c 100644
--- a/Documentation/DocBook/media/v4l/controls.xml
+++ b/Documentation/DocBook/media/v4l/controls.xml
@@ -3965,6 +3965,27 @@ by exposure, white balance or focus controls.</entry>
 	  </row>
 	  <row><entry></entry></row>
 
+	  <row>
+	    <entry spanname="id"><constant>V4L2_CID_PAN_SPEED</constant>&nbsp;</entry>
+	    <entry>integer</entry>
+	  </row><row><entry spanname="descr">This control turns the
+camera horizontally at the specific speed. The unit is undefined. A
+positive value moves the camera to the right (clockwise when viewed
+from above), a negative value to the left. A value of zero stops the motion
+if one is in progress and has no effect otherwise.</entry>
+	  </row>
+	  <row><entry></entry></row>
+
+	  <row>
+	    <entry spanname="id"><constant>V4L2_CID_TILT_SPEED</constant>&nbsp;</entry>
+	    <entry>integer</entry>
+	  </row><row><entry spanname="descr">This control turns the
+camera vertically at the specified speed. The unit is undefined. A
+positive value moves the camera up, a negative value down. A value of zero
+stops the motion if one is in progress and has no effect otherwise.</entry>
+	  </row>
+	  <row><entry></entry></row>
+
 	</tbody>
       </tgroup>
     </table>
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 35d1f3d5045b..86012140923f 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -796,6 +796,8 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_AUTO_FOCUS_STOP:		return "Auto Focus, Stop";
 	case V4L2_CID_AUTO_FOCUS_STATUS:	return "Auto Focus, Status";
 	case V4L2_CID_AUTO_FOCUS_RANGE:		return "Auto Focus, Range";
+	case V4L2_CID_PAN_SPEED:		return "Pan, Speed";
+	case V4L2_CID_TILT_SPEED:		return "Tilt, Speed";
 
 	/* FM Radio Modulator controls */
 	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 8b930210a4b9..661f119a51b8 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -746,6 +746,8 @@ enum v4l2_auto_focus_range {
 	V4L2_AUTO_FOCUS_RANGE_INFINITY		= 3,
 };
 
+#define V4L2_CID_PAN_SPEED			(V4L2_CID_CAMERA_CLASS_BASE+32)
+#define V4L2_CID_TILT_SPEED			(V4L2_CID_CAMERA_CLASS_BASE+33)
 
 /* FM Modulator class control IDs */
 
-- 
cgit v1.2.3


From fcc0d3db28922f9ba21ea6c7b23ea10ffb5d3521 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Mon, 21 Jul 2014 17:06:33 -0300
Subject: [media] v4l: Add ARGB555X and XRGB555X pixel formats

The existing RGB555X pixel format is ill-defined in respect to its alpha
bit and its meaning is driver dependent. Create new standard ARGB555X
and XRGB555X variants with clearly defined meanings and make the
existing variant deprecated.

The new pixel formats 4CC values have been selected to match the DRM
4CCs for the same in-memory formats.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 .../DocBook/media/v4l/pixfmt-packed-rgb.xml        | 50 ++++++++++++++++++++--
 include/uapi/linux/videodev2.h                     |  3 ++
 2 files changed, 50 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml b/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml
index 2aae8e9452a4..65a11867e0ae 100644
--- a/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml
+++ b/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml
@@ -237,9 +237,9 @@ for a pixel lie next to each other in memory.</para>
 	    <entry>g<subscript>4</subscript></entry>
 	    <entry>g<subscript>3</subscript></entry>
 	  </row>
-	  <row id="V4L2-PIX-FMT-RGB555X">
-	    <entry><constant>V4L2_PIX_FMT_RGB555X</constant></entry>
-	    <entry>'RGBQ'</entry>
+	  <row id="V4L2-PIX-FMT-ARGB555X">
+	    <entry><constant>V4L2_PIX_FMT_ARGB555X</constant></entry>
+	    <entry>'AR15' | (1 &lt;&lt; 31)</entry>
 	    <entry></entry>
 	    <entry>a</entry>
 	    <entry>r<subscript>4</subscript></entry>
@@ -259,6 +259,28 @@ for a pixel lie next to each other in memory.</para>
 	    <entry>b<subscript>1</subscript></entry>
 	    <entry>b<subscript>0</subscript></entry>
 	  </row>
+	  <row id="V4L2-PIX-FMT-XRGB555X">
+	    <entry><constant>V4L2_PIX_FMT_XRGB555X</constant></entry>
+	    <entry>'XR15' | (1 &lt;&lt; 31)</entry>
+	    <entry></entry>
+	    <entry>-</entry>
+	    <entry>r<subscript>4</subscript></entry>
+	    <entry>r<subscript>3</subscript></entry>
+	    <entry>r<subscript>2</subscript></entry>
+	    <entry>r<subscript>1</subscript></entry>
+	    <entry>r<subscript>0</subscript></entry>
+	    <entry>g<subscript>4</subscript></entry>
+	    <entry>g<subscript>3</subscript></entry>
+	    <entry></entry>
+	    <entry>g<subscript>2</subscript></entry>
+	    <entry>g<subscript>1</subscript></entry>
+	    <entry>g<subscript>0</subscript></entry>
+	    <entry>b<subscript>4</subscript></entry>
+	    <entry>b<subscript>3</subscript></entry>
+	    <entry>b<subscript>2</subscript></entry>
+	    <entry>b<subscript>1</subscript></entry>
+	    <entry>b<subscript>0</subscript></entry>
+	  </row>
 	  <row id="V4L2-PIX-FMT-RGB565X">
 	    <entry><constant>V4L2_PIX_FMT_RGB565X</constant></entry>
 	    <entry>'RGBR'</entry>
@@ -800,6 +822,28 @@ image</title>
 	    <entry>g<subscript>4</subscript></entry>
 	    <entry>g<subscript>3</subscript></entry>
 	  </row>
+	  <row id="V4L2-PIX-FMT-RGB555X">
+	    <entry><constant>V4L2_PIX_FMT_RGB555X</constant></entry>
+	    <entry>'RGBQ'</entry>
+	    <entry></entry>
+	    <entry>a</entry>
+	    <entry>r<subscript>4</subscript></entry>
+	    <entry>r<subscript>3</subscript></entry>
+	    <entry>r<subscript>2</subscript></entry>
+	    <entry>r<subscript>1</subscript></entry>
+	    <entry>r<subscript>0</subscript></entry>
+	    <entry>g<subscript>4</subscript></entry>
+	    <entry>g<subscript>3</subscript></entry>
+	    <entry></entry>
+	    <entry>g<subscript>2</subscript></entry>
+	    <entry>g<subscript>1</subscript></entry>
+	    <entry>g<subscript>0</subscript></entry>
+	    <entry>b<subscript>4</subscript></entry>
+	    <entry>b<subscript>3</subscript></entry>
+	    <entry>b<subscript>2</subscript></entry>
+	    <entry>b<subscript>1</subscript></entry>
+	    <entry>b<subscript>0</subscript></entry>
+	  </row>
 	  <row id="V4L2-PIX-FMT-BGR32">
 	    <entry><constant>V4L2_PIX_FMT_BGR32</constant></entry>
 	    <entry>'BGR4'</entry>
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 0b1ba5c6a8d2..1c2f84fd4d99 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -79,6 +79,7 @@
 /*  Four-character-code (FOURCC) */
 #define v4l2_fourcc(a, b, c, d)\
 	((__u32)(a) | ((__u32)(b) << 8) | ((__u32)(c) << 16) | ((__u32)(d) << 24))
+#define v4l2_fourcc_be(a, b, c, d)	(v4l2_fourcc(a, b, c, d) | (1 << 31))
 
 /*
  *	E N U M S
@@ -307,6 +308,8 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_XRGB555 v4l2_fourcc('X', 'R', '1', '5') /* 16  XRGB-1-5-5-5  */
 #define V4L2_PIX_FMT_RGB565  v4l2_fourcc('R', 'G', 'B', 'P') /* 16  RGB-5-6-5     */
 #define V4L2_PIX_FMT_RGB555X v4l2_fourcc('R', 'G', 'B', 'Q') /* 16  RGB-5-5-5 BE  */
+#define V4L2_PIX_FMT_ARGB555X v4l2_fourcc_be('A', 'R', '1', '5') /* 16  ARGB-5-5-5 BE */
+#define V4L2_PIX_FMT_XRGB555X v4l2_fourcc_be('X', 'R', '1', '5') /* 16  XRGB-5-5-5 BE */
 #define V4L2_PIX_FMT_RGB565X v4l2_fourcc('R', 'G', 'B', 'R') /* 16  RGB-5-6-5 BE  */
 #define V4L2_PIX_FMT_BGR666  v4l2_fourcc('B', 'G', 'R', 'H') /* 18  BGR-6-6-6	  */
 #define V4L2_PIX_FMT_BGR24   v4l2_fourcc('B', 'G', 'R', '3') /* 24  BGR-8-8-8     */
-- 
cgit v1.2.3


From 7e51aa4486bcf72daeb5d30227c4c01563f37044 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 2 Apr 2014 16:00:58 -0400
Subject: audit: drop unused struct audit_rule definition

The kernel only uses struct audit_rule_data.  We dropped support for
struct audit_rule a long time ago.  Drop the definition in the header
file.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/uapi/linux/audit.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index cf6714752b69..df71b1d2cbb8 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -444,17 +444,4 @@ struct audit_rule_data {
 	char		buf[0];	/* string fields buffer */
 };
 
-/* audit_rule is supported to maintain backward compatibility with
- * userspace.  It supports integer fields only and corresponds to
- * AUDIT_ADD, AUDIT_DEL and AUDIT_LIST requests.
- */
-struct audit_rule {		/* for AUDIT_LIST, AUDIT_ADD, and AUDIT_DEL */
-	__u32		flags;	/* AUDIT_PER_{TASK,CALL}, AUDIT_PREPEND */
-	__u32		action;	/* AUDIT_NEVER, AUDIT_POSSIBLE, AUDIT_ALWAYS */
-	__u32		field_count;
-	__u32		mask[AUDIT_BITMASK_SIZE];
-	__u32		fields[AUDIT_MAX_FIELDS];
-	__u32		values[AUDIT_MAX_FIELDS];
-};
-
 #endif /* _UAPI_LINUX_AUDIT_H_ */
-- 
cgit v1.2.3


From ce5d112827e5c2e9864323d0efd7ec2a62c6dce0 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 11 Mar 2014 13:50:46 -0400
Subject: ARCH: AUDIT: implement syscall_get_arch for all arches

For all arches which support audit implement syscall_get_arch()
They are all pretty easy and straight forward, stolen from how the call
to audit_syscall_entry() determines the arch.

Based-on-patch-by: Richard Briggs <rgb@redhat.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
Cc: linux-ia64@vger.kernel.org
Cc: microblaze-uclinux@itee.uq.edu.au
Cc: linux-mips@linux-mips.org
Cc: linux@lists.openrisc.net
Cc: linux-parisc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: sparclinux@vger.kernel.org
---
 arch/ia64/include/asm/syscall.h       |  6 ++++++
 arch/microblaze/include/asm/syscall.h |  5 +++++
 arch/mips/include/asm/syscall.h       |  2 +-
 arch/openrisc/include/asm/syscall.h   |  5 +++++
 arch/parisc/include/asm/syscall.h     | 11 +++++++++++
 arch/powerpc/include/asm/syscall.h    | 12 ++++++++++++
 arch/sparc/include/asm/syscall.h      |  8 ++++++++
 include/uapi/linux/audit.h            |  1 +
 8 files changed, 49 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h
index a7ff1c6ab068..1d0b875fec44 100644
--- a/arch/ia64/include/asm/syscall.h
+++ b/arch/ia64/include/asm/syscall.h
@@ -13,6 +13,7 @@
 #ifndef _ASM_SYSCALL_H
 #define _ASM_SYSCALL_H	1
 
+#include <uapi/linux/audit.h>
 #include <linux/sched.h>
 #include <linux/err.h>
 
@@ -79,4 +80,9 @@ static inline void syscall_set_arguments(struct task_struct *task,
 
 	ia64_syscall_get_set_arguments(task, regs, i, n, args, 1);
 }
+
+static inline int syscall_get_arch(void)
+{
+	return AUDIT_ARCH_IA64;
+}
 #endif	/* _ASM_SYSCALL_H */
diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h
index 9bc431783105..53cfaf34c343 100644
--- a/arch/microblaze/include/asm/syscall.h
+++ b/arch/microblaze/include/asm/syscall.h
@@ -1,6 +1,7 @@
 #ifndef __ASM_MICROBLAZE_SYSCALL_H
 #define __ASM_MICROBLAZE_SYSCALL_H
 
+#include <uapi/linux/audit.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <asm/ptrace.h>
@@ -99,4 +100,8 @@ static inline void syscall_set_arguments(struct task_struct *task,
 asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
 asmlinkage void do_syscall_trace_leave(struct pt_regs *regs);
 
+static inline int syscall_get_arch(void)
+{
+	return AUDIT_ARCH_MICROBLAZE;
+}
 #endif /* __ASM_MICROBLAZE_SYSCALL_H */
diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h
index 17960fe7a8ce..93b3b86c293c 100644
--- a/arch/mips/include/asm/syscall.h
+++ b/arch/mips/include/asm/syscall.h
@@ -129,7 +129,7 @@ extern const unsigned long sysn32_call_table[];
 
 static inline int syscall_get_arch(void)
 {
-	int arch = EM_MIPS;
+	int arch = AUDIT_ARCH_MIPS;
 #ifdef CONFIG_64BIT
 	if (!test_thread_flag(TIF_32BIT_REGS))
 		arch |= __AUDIT_ARCH_64BIT;
diff --git a/arch/openrisc/include/asm/syscall.h b/arch/openrisc/include/asm/syscall.h
index b752bb67891d..2db9f1cf0694 100644
--- a/arch/openrisc/include/asm/syscall.h
+++ b/arch/openrisc/include/asm/syscall.h
@@ -19,6 +19,7 @@
 #ifndef __ASM_OPENRISC_SYSCALL_H__
 #define __ASM_OPENRISC_SYSCALL_H__
 
+#include <uapi/linux/audit.h>
 #include <linux/err.h>
 #include <linux/sched.h>
 
@@ -71,4 +72,8 @@ syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
 	memcpy(&regs->gpr[3 + i], args, n * sizeof(args[0]));
 }
 
+static inline int syscall_get_arch(void)
+{
+	return AUDIT_ARCH_OPENRISC;
+}
 #endif
diff --git a/arch/parisc/include/asm/syscall.h b/arch/parisc/include/asm/syscall.h
index 8bdfd2c8c39f..a5eba95d87fe 100644
--- a/arch/parisc/include/asm/syscall.h
+++ b/arch/parisc/include/asm/syscall.h
@@ -3,6 +3,8 @@
 #ifndef _ASM_PARISC_SYSCALL_H_
 #define _ASM_PARISC_SYSCALL_H_
 
+#include <uapi/linux/audit.h>
+#include <linux/compat.h>
 #include <linux/err.h>
 #include <asm/ptrace.h>
 
@@ -37,4 +39,13 @@ static inline void syscall_get_arguments(struct task_struct *tsk,
 	}
 }
 
+static inline int syscall_get_arch(void)
+{
+	int arch = AUDIT_ARCH_PARISC;
+#ifdef CONFIG_64BIT
+	if (!is_compat_task())
+		arch = AUDIT_ARCH_PARISC64;
+#endif
+	return arch;
+}
 #endif /*_ASM_PARISC_SYSCALL_H_*/
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index b54b2add07be..427154444f6d 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -13,6 +13,8 @@
 #ifndef _ASM_SYSCALL_H
 #define _ASM_SYSCALL_H	1
 
+#include <uapi/linux/audit.h>
+#include <linux/compat.h>
 #include <linux/sched.h>
 
 /* ftrace syscalls requires exporting the sys_call_table */
@@ -86,4 +88,14 @@ static inline void syscall_set_arguments(struct task_struct *task,
 	memcpy(&regs->gpr[3 + i], args, n * sizeof(args[0]));
 }
 
+static inline int syscall_get_arch(void)
+{
+	int arch = AUDIT_ARCH_PPC;
+
+#ifdef CONFIG_PPC64
+	if (!is_32bit_task())
+		arch = AUDIT_ARCH_PPC64;
+#endif
+	return arch;
+}
 #endif	/* _ASM_SYSCALL_H */
diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h
index 025a02ad2e31..fed3d511b108 100644
--- a/arch/sparc/include/asm/syscall.h
+++ b/arch/sparc/include/asm/syscall.h
@@ -1,9 +1,11 @@
 #ifndef __ASM_SPARC_SYSCALL_H
 #define __ASM_SPARC_SYSCALL_H
 
+#include <uapi/linux/audit.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <asm/ptrace.h>
+#include <asm/thread_info.h>
 
 /*
  * The syscall table always contains 32 bit pointers since we know that the
@@ -124,4 +126,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
 		regs->u_regs[UREG_I0 + i + j] = args[j];
 }
 
+static inline int syscall_get_arch(void)
+{
+	return test_thread_flag(TIF_32BIT) ? AUDIT_ARCH_SPARC
+					   : AUDIT_ARCH_SPARC64;
+}
+
 #endif /* __ASM_SPARC_SYSCALL_H */
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index df71b1d2cbb8..4d100c841c80 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -351,6 +351,7 @@ enum {
 #define AUDIT_ARCH_IA64		(EM_IA_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_M32R		(EM_M32R)
 #define AUDIT_ARCH_M68K		(EM_68K)
+#define AUDIT_ARCH_MICROBLAZE	(EM_MICROBLAZE)
 #define AUDIT_ARCH_MIPS		(EM_MIPS)
 #define AUDIT_ARCH_MIPSEL	(EM_MIPS|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_MIPS64	(EM_MIPS|__AUDIT_ARCH_64BIT)
-- 
cgit v1.2.3


From 26d8f6f15112b8b0fbff360c360e8c42bf2bc370 Mon Sep 17 00:00:00 2001
From: Frank Haverkamp <haver@linux.vnet.ibm.com>
Date: Wed, 10 Sep 2014 16:37:48 +0200
Subject: GenWQE: Update author information

Updated email address of co-author.

Signed-off-by: Frank Haverkamp <haver@linux.vnet.ibm.com>
Signed-off-by: Michael Jung <mijung@gmx.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/genwqe/card_base.c         | 4 ++--
 drivers/misc/genwqe/card_base.h         | 2 +-
 drivers/misc/genwqe/card_ddcb.c         | 2 +-
 drivers/misc/genwqe/card_ddcb.h         | 2 +-
 drivers/misc/genwqe/card_debugfs.c      | 2 +-
 drivers/misc/genwqe/card_dev.c          | 2 +-
 drivers/misc/genwqe/card_sysfs.c        | 2 +-
 drivers/misc/genwqe/card_utils.c        | 2 +-
 drivers/misc/genwqe/genwqe_driver.h     | 2 +-
 include/uapi/linux/genwqe/genwqe_card.h | 2 +-
 10 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c
index c60ad4bd7ed6..12926c8c0609 100644
--- a/drivers/misc/genwqe/card_base.c
+++ b/drivers/misc/genwqe/card_base.c
@@ -5,7 +5,7 @@
  *
  * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
  * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
- * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
  * Author: Michael Ruettger <michael@ibmra.de>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -45,7 +45,7 @@
 MODULE_AUTHOR("Frank Haverkamp <haver@linux.vnet.ibm.com>");
 MODULE_AUTHOR("Michael Ruettger <michael@ibmra.de>");
 MODULE_AUTHOR("Joerg-Stephan Vogt <jsvogt@de.ibm.com>");
-MODULE_AUTHOR("Michal Jung <mijung@de.ibm.com>");
+MODULE_AUTHOR("Michael Jung <mijung@gmx.net>");
 
 MODULE_DESCRIPTION("GenWQE Card");
 MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/misc/genwqe/card_base.h b/drivers/misc/genwqe/card_base.h
index 37657d6228de..b622d204b72b 100644
--- a/drivers/misc/genwqe/card_base.h
+++ b/drivers/misc/genwqe/card_base.h
@@ -8,7 +8,7 @@
  *
  * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
  * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
- * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
  * Author: Michael Ruettger <michael@ibmra.de>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c
index 1102e1ef8104..75ff67485fdf 100644
--- a/drivers/misc/genwqe/card_ddcb.c
+++ b/drivers/misc/genwqe/card_ddcb.c
@@ -5,7 +5,7 @@
  *
  * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
  * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
- * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
  * Author: Michael Ruettger <michael@ibmra.de>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/misc/genwqe/card_ddcb.h b/drivers/misc/genwqe/card_ddcb.h
index c4f26720753e..0361a68d79a6 100644
--- a/drivers/misc/genwqe/card_ddcb.h
+++ b/drivers/misc/genwqe/card_ddcb.h
@@ -8,7 +8,7 @@
  *
  * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
  * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
- * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
  * Author: Michael Ruettger <michael@ibmra.de>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/misc/genwqe/card_debugfs.c b/drivers/misc/genwqe/card_debugfs.c
index 170a7915497c..40b425d35866 100644
--- a/drivers/misc/genwqe/card_debugfs.c
+++ b/drivers/misc/genwqe/card_debugfs.c
@@ -5,7 +5,7 @@
  *
  * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
  * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
- * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
  * Author: Michael Ruettger <michael@ibmra.de>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c
index aae42555e2ca..80fdde2873de 100644
--- a/drivers/misc/genwqe/card_dev.c
+++ b/drivers/misc/genwqe/card_dev.c
@@ -5,7 +5,7 @@
  *
  * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
  * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
- * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
  * Author: Michael Ruettger <michael@ibmra.de>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/misc/genwqe/card_sysfs.c b/drivers/misc/genwqe/card_sysfs.c
index 1d1c6069ebd7..2c33fbca9225 100644
--- a/drivers/misc/genwqe/card_sysfs.c
+++ b/drivers/misc/genwqe/card_sysfs.c
@@ -5,7 +5,7 @@
  *
  * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
  * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
- * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
  * Author: Michael Ruettger <michael@ibmra.de>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c
index a6400f09229c..349b342c369b 100644
--- a/drivers/misc/genwqe/card_utils.c
+++ b/drivers/misc/genwqe/card_utils.c
@@ -5,7 +5,7 @@
  *
  * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
  * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
- * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
  * Author: Michael Ruettger <michael@ibmra.de>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/misc/genwqe/genwqe_driver.h b/drivers/misc/genwqe/genwqe_driver.h
index e39d81fd93c4..15355350e076 100644
--- a/drivers/misc/genwqe/genwqe_driver.h
+++ b/drivers/misc/genwqe/genwqe_driver.h
@@ -8,7 +8,7 @@
  *
  * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
  * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
- * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
  * Author: Michael Ruettger <michael@ibmra.de>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/include/uapi/linux/genwqe/genwqe_card.h b/include/uapi/linux/genwqe/genwqe_card.h
index 4fc065f29255..baa93fb4cd4f 100644
--- a/include/uapi/linux/genwqe/genwqe_card.h
+++ b/include/uapi/linux/genwqe/genwqe_card.h
@@ -8,7 +8,7 @@
  *
  * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
  * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
- * Author: Michael Jung <mijung@de.ibm.com>
+ * Author: Michael Jung <mijung@gmx.net>
  * Author: Michael Ruettger <michael@ibmra.de>
  *
  * This program is free software; you can redistribute it and/or modify
-- 
cgit v1.2.3


From bc5a5b02331a3175a5fca20a4beba249e573b672 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Tue, 2 Sep 2014 19:21:47 -0700
Subject: Drivers: hv: util: Properly pack the data for file copy functionality

Properly pack the data for file copy functionality. Patch based on
investigation done by Matej Muzila <mmuzila@redhat.com>

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reported-by: <qge@redhat.com>
Cc: <stable@vger.kernel.org>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/hyperv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h
index 78e4a86030dd..0a8e6badb29b 100644
--- a/include/uapi/linux/hyperv.h
+++ b/include/uapi/linux/hyperv.h
@@ -137,7 +137,7 @@ struct hv_do_fcopy {
 	__u64	offset;
 	__u32	size;
 	__u8	data[DATA_FRAGMENT];
-};
+} __attribute__((packed));
 
 /*
  * An implementation of HyperV key value pair (KVP) functionality for Linux.
-- 
cgit v1.2.3


From 846fc70986a65563a19ae86928c3acf34f12296d Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Wed, 13 Aug 2014 02:22:40 -0400
Subject: PCI/AER: Rename PCI_ERR_UNC_TRAIN to PCI_ERR_UNC_UND

In PCIe r1.0, sec 5.10.2, bit 0 of the Uncorrectable Error Status, Mask,
and Severity Registers was for "Training Error." In PCIe r1.1, sec 7.10.2,
bit 0 was redefined to be "Undefined."

Rename PCI_ERR_UNC_TRAIN to PCI_ERR_UNC_UND to reflect this change.

No functional change.

[bhelgaas: changelog]
Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/vfio/pci/vfio_pci_config.c | 2 +-
 include/ras/ras_event.h            | 2 +-
 include/uapi/linux/pci_regs.h      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index e50790e91f76..1de3f94aa7de 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -727,7 +727,7 @@ static int __init init_pci_ext_cap_err_perm(struct perm_bits *perm)
 	p_setd(perm, 0, ALL_VIRT, NO_WRITE);
 
 	/* Writable bits mask */
-	mask =	PCI_ERR_UNC_TRAIN |		/* Training */
+	mask =	PCI_ERR_UNC_UND |		/* Undefined */
 		PCI_ERR_UNC_DLP |		/* Data Link Protocol */
 		PCI_ERR_UNC_SURPDN |		/* Surprise Down */
 		PCI_ERR_UNC_POISON_TLP |	/* Poisoned TLP */
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 0f04a9755d1e..79abb9c71772 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -185,7 +185,7 @@ TRACE_EVENT(mc_event,
 	{PCI_ERR_COR_LOG_OVER,	"Header Log Overflow"}
 
 #define aer_uncorrectable_errors				\
-	{PCI_ERR_UNC_TRAIN,	"Undefined"},			\
+	{PCI_ERR_UNC_UND,	"Undefined"},			\
 	{PCI_ERR_UNC_DLP,	"Data Link Protocol Error"},	\
 	{PCI_ERR_UNC_SURPDN,	"Surprise Down Error"},		\
 	{PCI_ERR_UNC_POISON_TLP,"Poisoned TLP"},		\
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 30db069bce62..99e3182f2c96 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -630,7 +630,7 @@
 
 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
-#define  PCI_ERR_UNC_TRAIN	0x00000001	/* Training */
+#define  PCI_ERR_UNC_UND	0x00000001	/* Undefined */
 #define  PCI_ERR_UNC_DLP	0x00000010	/* Data Link Protocol */
 #define  PCI_ERR_UNC_SURPDN	0x00000020	/* Surprise Down */
 #define  PCI_ERR_UNC_POISON_TLP	0x00001000	/* Poisoned TLP */
-- 
cgit v1.2.3


From 5c2cacc1028917168b0f7650008dceaa6f7e3fe2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Wed, 24 Sep 2014 09:47:27 -0300
Subject: [media] v4l2-dv-timings: fix a sparse warning

This is detected with:
	gcc-4.8.3-7.fc20.x86_64
	sparse-0.5.0-3.fc20.x86_64

drivers/media/v4l2-core/v4l2-dv-timings.c:34:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:35:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:36:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:37:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:38:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:39:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:40:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:41:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:42:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:43:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:44:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:45:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:46:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:47:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:48:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:49:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:50:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:51:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:52:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:53:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:54:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:55:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:56:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:57:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:58:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:59:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:60:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:61:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:62:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:63:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:64:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:65:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:66:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:67:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:68:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:69:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:70:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:71:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:72:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:73:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:74:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:75:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:76:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:77:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:78:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:79:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:80:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:81:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:82:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:83:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:84:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:85:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:86:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:87:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:88:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:89:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:90:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:91:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:92:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:93:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:94:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:95:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:96:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:97:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:98:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:99:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:100:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:101:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:102:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:103:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:104:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:105:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:106:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:107:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:108:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:109:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:110:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:111:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:112:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:113:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:114:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:115:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:116:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:117:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:118:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:119:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:120:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:121:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:122:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:123:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:124:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:125:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:126:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:127:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:128:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:129:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:130:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:131:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:132:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:133:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:134:9: error: unknown field name in initializer
drivers/media/v4l2-core/v4l2-dv-timings.c:135:9: error: too many errors
drivers/media/usb/hdpvr/hdpvr-video.c:42:9: error: unknown field name in initializer
drivers/media/usb/hdpvr/hdpvr-video.c:43:9: error: unknown field name in initializer
drivers/media/usb/hdpvr/hdpvr-video.c:44:9: error: unknown field name in initializer
drivers/media/usb/hdpvr/hdpvr-video.c:45:9: error: unknown field name in initializer
drivers/media/usb/hdpvr/hdpvr-video.c:46:9: error: unknown field name in initializer
drivers/media/usb/hdpvr/hdpvr-video.c:47:9: error: unknown field name in initializer
drivers/media/usb/hdpvr/hdpvr-video.c:48:9: error: unknown field name in initializer
drivers/media/usb/hdpvr/hdpvr-video.c:49:9: error: unknown field name in initializer
drivers/media/platform/s5p-tv/hdmi_drv.c:484:18: error: unknown field name in initializer
drivers/media/platform/s5p-tv/hdmi_drv.c:485:18: error: unknown field name in initializer
drivers/media/platform/s5p-tv/hdmi_drv.c:486:18: error: unknown field name in initializer
drivers/media/platform/s5p-tv/hdmi_drv.c:487:18: error: unknown field name in initializer
drivers/media/platform/s5p-tv/hdmi_drv.c:488:18: error: unknown field name in initializer
drivers/media/platform/s5p-tv/hdmi_drv.c:489:18: error: unknown field name in initializer
drivers/media/platform/s5p-tv/hdmi_drv.c:490:18: error: unknown field name in initializer
drivers/media/platform/s5p-tv/hdmi_drv.c:491:18: error: unknown field name in initializer
drivers/media/platform/s5p-tv/hdmi_drv.c:492:18: error: unknown field name in initializer
drivers/media/platform/s5p-tv/hdmi_drv.c:493:18: error: unknown field name in initializer

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/v4l2-dv-timings.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/v4l2-dv-timings.h b/include/uapi/linux/v4l2-dv-timings.h
index 6c8f159e416e..6a0764c89fcb 100644
--- a/include/uapi/linux/v4l2-dv-timings.h
+++ b/include/uapi/linux/v4l2-dv-timings.h
@@ -21,17 +21,8 @@
 #ifndef _V4L2_DV_TIMINGS_H
 #define _V4L2_DV_TIMINGS_H
 
-#if __GNUC__ < 4 || (__GNUC__ == 4 && (__GNUC_MINOR__ < 6))
-/* Sadly gcc versions older than 4.6 have a bug in how they initialize
-   anonymous unions where they require additional curly brackets.
-   This violates the C1x standard. This workaround adds the curly brackets
-   if needed. */
 #define V4L2_INIT_BT_TIMINGS(_width, args...) \
 	{ .bt = { _width , ## args } }
-#else
-#define V4L2_INIT_BT_TIMINGS(_width, args...) \
-	.bt = { _width , ## args }
-#endif
 
 /* CEA-861-E timings (i.e. standard HDTV timings) */
 
-- 
cgit v1.2.3


From 29075feaf1f55e6b1aa4054b44bc141e8d5eab0b Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 26 Sep 2014 09:05:39 -0700
Subject: next: openrisc: Fix build

openrisc:defconfig fails to build in next-20140926 with the following error.

In file included from arch/openrisc/kernel/signal.c:31:0:
./arch/openrisc/include/asm/syscall.h: In function 'syscall_get_arch':
./arch/openrisc/include/asm/syscall.h:77:9: error: 'EM_OPENRISC' undeclared

Fix by moving EM_OPENRISC to include/uapi/linux/elf-em.h.

Fixes: ce5d112827e5 ("ARCH: AUDIT: implement syscall_get_arch for all arches")
Cc: Eric Paris <eparis@redhat.com>
Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 arch/openrisc/include/uapi/asm/elf.h | 3 +--
 include/uapi/linux/elf-em.h          | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/openrisc/include/uapi/asm/elf.h b/arch/openrisc/include/uapi/asm/elf.h
index f02ea5830420..88842760e66f 100644
--- a/arch/openrisc/include/uapi/asm/elf.h
+++ b/arch/openrisc/include/uapi/asm/elf.h
@@ -55,9 +55,8 @@ typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 /* A placeholder; OR32 does not have fp support yes, so no fp regs for now.  */
 typedef unsigned long elf_fpregset_t;
 
-/* This should be moved to include/linux/elf.h */
+/* EM_OPENRISC is defined in linux/elf-em.h */
 #define EM_OR32         0x8472
-#define EM_OPENRISC     92     /* OpenRISC 32-bit embedded processor */
 
 /*
  * These are used to set parameters in the core dumps.
diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h
index 01529bd96438..aa90bc98b6e2 100644
--- a/include/uapi/linux/elf-em.h
+++ b/include/uapi/linux/elf-em.h
@@ -32,6 +32,7 @@
 #define EM_V850		87	/* NEC v850 */
 #define EM_M32R		88	/* Renesas M32R */
 #define EM_MN10300	89	/* Panasonic/MEI MN10300, AM33 */
+#define EM_OPENRISC     92     /* OpenRISC 32-bit embedded processor */
 #define EM_BLACKFIN     106     /* ADI Blackfin Processor */
 #define EM_TI_C6000	140	/* TI C6X DSPs */
 #define EM_AARCH64	183	/* ARM 64 bit */
-- 
cgit v1.2.3


From 99c55f7d47c0dc6fc64729f37bf435abf43f4c60 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:16:57 -0700
Subject: bpf: introduce BPF syscall and maps

BPF syscall is a multiplexor for a range of different operations on eBPF.
This patch introduces syscall with single command to create a map.
Next patch adds commands to access maps.

'maps' is a generic storage of different types for sharing data between kernel
and userspace.

Userspace example:
/* this syscall wrapper creates a map with given type and attributes
 * and returns map_fd on success.
 * use close(map_fd) to delete the map
 */
int bpf_create_map(enum bpf_map_type map_type, int key_size,
                   int value_size, int max_entries)
{
    union bpf_attr attr = {
        .map_type = map_type,
        .key_size = key_size,
        .value_size = value_size,
        .max_entries = max_entries
    };

    return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
}

'union bpf_attr' is backwards compatible with future extensions.

More details in Documentation/networking/filter.txt and in manpage

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt |  39 +++++++++
 include/linux/bpf.h                 |  41 +++++++++
 include/uapi/linux/bpf.h            |  23 +++++
 kernel/bpf/Makefile                 |   2 +-
 kernel/bpf/syscall.c                | 169 ++++++++++++++++++++++++++++++++++++
 5 files changed, 273 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/bpf.h
 create mode 100644 kernel/bpf/syscall.c

(limited to 'include/uapi')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 014e0319a5c4..4a01d71785e9 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -1001,6 +1001,45 @@ instruction that loads 64-bit immediate value into a dst_reg.
 Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads
 32-bit immediate value into a register.
 
+eBPF maps
+---------
+'maps' is a generic storage of different types for sharing data between kernel
+and userspace.
+
+The maps are accessed from user space via BPF syscall, which has commands:
+- create a map with given type and attributes
+  map_fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)
+  using attr->map_type, attr->key_size, attr->value_size, attr->max_entries
+  returns process-local file descriptor or negative error
+
+- lookup key in a given map
+  err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
+  using attr->map_fd, attr->key, attr->value
+  returns zero and stores found elem into value or negative error
+
+- create or update key/value pair in a given map
+  err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
+  using attr->map_fd, attr->key, attr->value
+  returns zero or negative error
+
+- find and delete element by key in a given map
+  err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
+  using attr->map_fd, attr->key
+
+- to delete map: close(fd)
+  Exiting process will delete maps automatically
+
+userspace programs use this syscall to create/access maps that eBPF programs
+are concurrently updating.
+
+maps can have different types: hash, array, bloom filter, radix-tree, etc.
+
+The map is defined by:
+  . type
+  . max number of elements
+  . key size in bytes
+  . value size in bytes
+
 Testing
 -------
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
new file mode 100644
index 000000000000..48014a71f0fe
--- /dev/null
+++ b/include/linux/bpf.h
@@ -0,0 +1,41 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _LINUX_BPF_H
+#define _LINUX_BPF_H 1
+
+#include <uapi/linux/bpf.h>
+#include <linux/workqueue.h>
+
+struct bpf_map;
+
+/* map is generic key/value storage optionally accesible by eBPF programs */
+struct bpf_map_ops {
+	/* funcs callable from userspace (via syscall) */
+	struct bpf_map *(*map_alloc)(union bpf_attr *attr);
+	void (*map_free)(struct bpf_map *);
+};
+
+struct bpf_map {
+	atomic_t refcnt;
+	enum bpf_map_type map_type;
+	u32 key_size;
+	u32 value_size;
+	u32 max_entries;
+	struct bpf_map_ops *ops;
+	struct work_struct work;
+};
+
+struct bpf_map_type_list {
+	struct list_head list_node;
+	struct bpf_map_ops *ops;
+	enum bpf_map_type type;
+};
+
+void bpf_register_map_type(struct bpf_map_type_list *tl);
+void bpf_map_put(struct bpf_map *map);
+
+#endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 479ed0b6be16..f58a10f9670c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -62,4 +62,27 @@ struct bpf_insn {
 	__s32	imm;		/* signed immediate constant */
 };
 
+/* BPF syscall commands */
+enum bpf_cmd {
+	/* create a map with given type and attributes
+	 * fd = bpf(BPF_MAP_CREATE, union bpf_attr *, u32 size)
+	 * returns fd or negative error
+	 * map is deleted when fd is closed
+	 */
+	BPF_MAP_CREATE,
+};
+
+enum bpf_map_type {
+	BPF_MAP_TYPE_UNSPEC,
+};
+
+union bpf_attr {
+	struct { /* anonymous struct used by BPF_MAP_CREATE command */
+		__u32	map_type;	/* one of enum bpf_map_type */
+		__u32	key_size;	/* size of key in bytes */
+		__u32	value_size;	/* size of value in bytes */
+		__u32	max_entries;	/* max number of entries in a map */
+	};
+} __attribute__((aligned(8)));
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 6a71145e2769..e9f7334ed07a 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1 +1 @@
-obj-y := core.o
+obj-y := core.o syscall.o
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
new file mode 100644
index 000000000000..428a0e23adc0
--- /dev/null
+++ b/kernel/bpf/syscall.c
@@ -0,0 +1,169 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/bpf.h>
+#include <linux/syscalls.h>
+#include <linux/slab.h>
+#include <linux/anon_inodes.h>
+
+static LIST_HEAD(bpf_map_types);
+
+static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
+{
+	struct bpf_map_type_list *tl;
+	struct bpf_map *map;
+
+	list_for_each_entry(tl, &bpf_map_types, list_node) {
+		if (tl->type == attr->map_type) {
+			map = tl->ops->map_alloc(attr);
+			if (IS_ERR(map))
+				return map;
+			map->ops = tl->ops;
+			map->map_type = attr->map_type;
+			return map;
+		}
+	}
+	return ERR_PTR(-EINVAL);
+}
+
+/* boot time registration of different map implementations */
+void bpf_register_map_type(struct bpf_map_type_list *tl)
+{
+	list_add(&tl->list_node, &bpf_map_types);
+}
+
+/* called from workqueue */
+static void bpf_map_free_deferred(struct work_struct *work)
+{
+	struct bpf_map *map = container_of(work, struct bpf_map, work);
+
+	/* implementation dependent freeing */
+	map->ops->map_free(map);
+}
+
+/* decrement map refcnt and schedule it for freeing via workqueue
+ * (unrelying map implementation ops->map_free() might sleep)
+ */
+void bpf_map_put(struct bpf_map *map)
+{
+	if (atomic_dec_and_test(&map->refcnt)) {
+		INIT_WORK(&map->work, bpf_map_free_deferred);
+		schedule_work(&map->work);
+	}
+}
+
+static int bpf_map_release(struct inode *inode, struct file *filp)
+{
+	struct bpf_map *map = filp->private_data;
+
+	bpf_map_put(map);
+	return 0;
+}
+
+static const struct file_operations bpf_map_fops = {
+	.release = bpf_map_release,
+};
+
+/* helper macro to check that unused fields 'union bpf_attr' are zero */
+#define CHECK_ATTR(CMD) \
+	memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
+		   sizeof(attr->CMD##_LAST_FIELD), 0, \
+		   sizeof(*attr) - \
+		   offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
+		   sizeof(attr->CMD##_LAST_FIELD)) != NULL
+
+#define BPF_MAP_CREATE_LAST_FIELD max_entries
+/* called via syscall */
+static int map_create(union bpf_attr *attr)
+{
+	struct bpf_map *map;
+	int err;
+
+	err = CHECK_ATTR(BPF_MAP_CREATE);
+	if (err)
+		return -EINVAL;
+
+	/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
+	map = find_and_alloc_map(attr);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	atomic_set(&map->refcnt, 1);
+
+	err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC);
+
+	if (err < 0)
+		/* failed to allocate fd */
+		goto free_map;
+
+	return err;
+
+free_map:
+	map->ops->map_free(map);
+	return err;
+}
+
+SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
+{
+	union bpf_attr attr = {};
+	int err;
+
+	/* the syscall is limited to root temporarily. This restriction will be
+	 * lifted when security audit is clean. Note that eBPF+tracing must have
+	 * this restriction, since it may pass kernel data to user space
+	 */
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!access_ok(VERIFY_READ, uattr, 1))
+		return -EFAULT;
+
+	if (size > PAGE_SIZE)	/* silly large */
+		return -E2BIG;
+
+	/* If we're handed a bigger struct than we know of,
+	 * ensure all the unknown bits are 0 - i.e. new
+	 * user-space does not rely on any kernel feature
+	 * extensions we dont know about yet.
+	 */
+	if (size > sizeof(attr)) {
+		unsigned char __user *addr;
+		unsigned char __user *end;
+		unsigned char val;
+
+		addr = (void __user *)uattr + sizeof(attr);
+		end  = (void __user *)uattr + size;
+
+		for (; addr < end; addr++) {
+			err = get_user(val, addr);
+			if (err)
+				return err;
+			if (val)
+				return -E2BIG;
+		}
+		size = sizeof(attr);
+	}
+
+	/* copy attributes from user space, may be less than sizeof(bpf_attr) */
+	if (copy_from_user(&attr, uattr, size) != 0)
+		return -EFAULT;
+
+	switch (cmd) {
+	case BPF_MAP_CREATE:
+		err = map_create(&attr);
+		break;
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+	return err;
+}
-- 
cgit v1.2.3


From 749730ce42a2121e1c88350d69478bff3994b10a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:16:58 -0700
Subject: bpf: enable bpf syscall on x64 and i386

done as separate commit to ease conflict resolution

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/syscalls/syscall_32.tbl  | 1 +
 arch/x86/syscalls/syscall_64.tbl  | 1 +
 include/linux/syscalls.h          | 3 ++-
 include/uapi/asm-generic/unistd.h | 4 +++-
 kernel/sys_ni.c                   | 3 +++
 5 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 028b78168d85..9fe1b5d002f0 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -363,3 +363,4 @@
 354	i386	seccomp			sys_seccomp
 355	i386	getrandom		sys_getrandom
 356	i386	memfd_create		sys_memfd_create
+357	i386	bpf			sys_bpf
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 35dd922727b9..281150b539a2 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -327,6 +327,7 @@
 318	common	getrandom		sys_getrandom
 319	common	memfd_create		sys_memfd_create
 320	common	kexec_file_load		sys_kexec_file_load
+321	common	bpf			sys_bpf
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 0f86d85a9ce4..bda9b81357cc 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -65,6 +65,7 @@ struct old_linux_dirent;
 struct perf_event_attr;
 struct file_handle;
 struct sigaltstack;
+union bpf_attr;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -875,5 +876,5 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
 			    const char __user *uargs);
 asmlinkage long sys_getrandom(char __user *buf, size_t count,
 			      unsigned int flags);
-
+asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size);
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 11d11bc5c78f..22749c134117 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -705,9 +705,11 @@ __SYSCALL(__NR_seccomp, sys_seccomp)
 __SYSCALL(__NR_getrandom, sys_getrandom)
 #define __NR_memfd_create 279
 __SYSCALL(__NR_memfd_create, sys_memfd_create)
+#define __NR_bpf 280
+__SYSCALL(__NR_bpf, sys_bpf)
 
 #undef __NR_syscalls
-#define __NR_syscalls 280
+#define __NR_syscalls 281
 
 /*
  * All syscalls below here should go away really,
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 391d4ddb6f4b..b4b5083f5f5e 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -218,3 +218,6 @@ cond_syscall(sys_kcmp);
 
 /* operate on Secure Computing state */
 cond_syscall(sys_seccomp);
+
+/* access BPF programs and maps */
+cond_syscall(sys_bpf);
-- 
cgit v1.2.3


From db20fd2b01087bdfbe30bce314a198eefedcc42e Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:16:59 -0700
Subject: bpf: add lookup/update/delete/iterate methods to BPF maps

'maps' is a generic storage of different types for sharing data between kernel
and userspace.

The maps are accessed from user space via BPF syscall, which has commands:

- create a map with given type and attributes
  fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)
  returns fd or negative error

- lookup key in a given map referenced by fd
  err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key, attr->value
  returns zero and stores found elem into value or negative error

- create or update key/value pair in a given map
  err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key, attr->value
  returns zero or negative error

- find and delete element by key in a given map
  err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key

- iterate map elements (based on input key return next_key)
  err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key, attr->next_key

- close(fd) deletes the map

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |   8 ++
 include/uapi/linux/bpf.h |  38 ++++++++
 kernel/bpf/syscall.c     | 235 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 281 insertions(+)

(limited to 'include/uapi')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 48014a71f0fe..2887f3f9da59 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -9,6 +9,7 @@
 
 #include <uapi/linux/bpf.h>
 #include <linux/workqueue.h>
+#include <linux/file.h>
 
 struct bpf_map;
 
@@ -17,6 +18,12 @@ struct bpf_map_ops {
 	/* funcs callable from userspace (via syscall) */
 	struct bpf_map *(*map_alloc)(union bpf_attr *attr);
 	void (*map_free)(struct bpf_map *);
+	int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
+
+	/* funcs callable from userspace and from eBPF programs */
+	void *(*map_lookup_elem)(struct bpf_map *map, void *key);
+	int (*map_update_elem)(struct bpf_map *map, void *key, void *value);
+	int (*map_delete_elem)(struct bpf_map *map, void *key);
 };
 
 struct bpf_map {
@@ -37,5 +44,6 @@ struct bpf_map_type_list {
 
 void bpf_register_map_type(struct bpf_map_type_list *tl);
 void bpf_map_put(struct bpf_map *map);
+struct bpf_map *bpf_map_get(struct fd f);
 
 #endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f58a10f9670c..395cabd2ca0a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -70,6 +70,35 @@ enum bpf_cmd {
 	 * map is deleted when fd is closed
 	 */
 	BPF_MAP_CREATE,
+
+	/* lookup key in a given map
+	 * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
+	 * Using attr->map_fd, attr->key, attr->value
+	 * returns zero and stores found elem into value
+	 * or negative error
+	 */
+	BPF_MAP_LOOKUP_ELEM,
+
+	/* create or update key/value pair in a given map
+	 * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
+	 * Using attr->map_fd, attr->key, attr->value
+	 * returns zero or negative error
+	 */
+	BPF_MAP_UPDATE_ELEM,
+
+	/* find and delete elem by key in a given map
+	 * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
+	 * Using attr->map_fd, attr->key
+	 * returns zero or negative error
+	 */
+	BPF_MAP_DELETE_ELEM,
+
+	/* lookup key in a given map and return next key
+	 * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
+	 * Using attr->map_fd, attr->key, attr->next_key
+	 * returns zero and stores next key or negative error
+	 */
+	BPF_MAP_GET_NEXT_KEY,
 };
 
 enum bpf_map_type {
@@ -83,6 +112,15 @@ union bpf_attr {
 		__u32	value_size;	/* size of value in bytes */
 		__u32	max_entries;	/* max number of entries in a map */
 	};
+
+	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+		__u32		map_fd;
+		__aligned_u64	key;
+		union {
+			__aligned_u64 value;
+			__aligned_u64 next_key;
+		};
+	};
 } __attribute__((aligned(8)));
 
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 428a0e23adc0..f94349ecaf61 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -13,6 +13,7 @@
 #include <linux/syscalls.h>
 #include <linux/slab.h>
 #include <linux/anon_inodes.h>
+#include <linux/file.h>
 
 static LIST_HEAD(bpf_map_types);
 
@@ -111,6 +112,228 @@ free_map:
 	return err;
 }
 
+/* if error is returned, fd is released.
+ * On success caller should complete fd access with matching fdput()
+ */
+struct bpf_map *bpf_map_get(struct fd f)
+{
+	struct bpf_map *map;
+
+	if (!f.file)
+		return ERR_PTR(-EBADF);
+
+	if (f.file->f_op != &bpf_map_fops) {
+		fdput(f);
+		return ERR_PTR(-EINVAL);
+	}
+
+	map = f.file->private_data;
+
+	return map;
+}
+
+/* helper to convert user pointers passed inside __aligned_u64 fields */
+static void __user *u64_to_ptr(__u64 val)
+{
+	return (void __user *) (unsigned long) val;
+}
+
+/* last field in 'union bpf_attr' used by this command */
+#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
+
+static int map_lookup_elem(union bpf_attr *attr)
+{
+	void __user *ukey = u64_to_ptr(attr->key);
+	void __user *uvalue = u64_to_ptr(attr->value);
+	int ufd = attr->map_fd;
+	struct fd f = fdget(ufd);
+	struct bpf_map *map;
+	void *key, *value;
+	int err;
+
+	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
+		return -EINVAL;
+
+	map = bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	err = -ENOMEM;
+	key = kmalloc(map->key_size, GFP_USER);
+	if (!key)
+		goto err_put;
+
+	err = -EFAULT;
+	if (copy_from_user(key, ukey, map->key_size) != 0)
+		goto free_key;
+
+	err = -ESRCH;
+	rcu_read_lock();
+	value = map->ops->map_lookup_elem(map, key);
+	if (!value)
+		goto err_unlock;
+
+	err = -EFAULT;
+	if (copy_to_user(uvalue, value, map->value_size) != 0)
+		goto err_unlock;
+
+	err = 0;
+
+err_unlock:
+	rcu_read_unlock();
+free_key:
+	kfree(key);
+err_put:
+	fdput(f);
+	return err;
+}
+
+#define BPF_MAP_UPDATE_ELEM_LAST_FIELD value
+
+static int map_update_elem(union bpf_attr *attr)
+{
+	void __user *ukey = u64_to_ptr(attr->key);
+	void __user *uvalue = u64_to_ptr(attr->value);
+	int ufd = attr->map_fd;
+	struct fd f = fdget(ufd);
+	struct bpf_map *map;
+	void *key, *value;
+	int err;
+
+	if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
+		return -EINVAL;
+
+	map = bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	err = -ENOMEM;
+	key = kmalloc(map->key_size, GFP_USER);
+	if (!key)
+		goto err_put;
+
+	err = -EFAULT;
+	if (copy_from_user(key, ukey, map->key_size) != 0)
+		goto free_key;
+
+	err = -ENOMEM;
+	value = kmalloc(map->value_size, GFP_USER);
+	if (!value)
+		goto free_key;
+
+	err = -EFAULT;
+	if (copy_from_user(value, uvalue, map->value_size) != 0)
+		goto free_value;
+
+	/* eBPF program that use maps are running under rcu_read_lock(),
+	 * therefore all map accessors rely on this fact, so do the same here
+	 */
+	rcu_read_lock();
+	err = map->ops->map_update_elem(map, key, value);
+	rcu_read_unlock();
+
+free_value:
+	kfree(value);
+free_key:
+	kfree(key);
+err_put:
+	fdput(f);
+	return err;
+}
+
+#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
+
+static int map_delete_elem(union bpf_attr *attr)
+{
+	void __user *ukey = u64_to_ptr(attr->key);
+	int ufd = attr->map_fd;
+	struct fd f = fdget(ufd);
+	struct bpf_map *map;
+	void *key;
+	int err;
+
+	if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
+		return -EINVAL;
+
+	map = bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	err = -ENOMEM;
+	key = kmalloc(map->key_size, GFP_USER);
+	if (!key)
+		goto err_put;
+
+	err = -EFAULT;
+	if (copy_from_user(key, ukey, map->key_size) != 0)
+		goto free_key;
+
+	rcu_read_lock();
+	err = map->ops->map_delete_elem(map, key);
+	rcu_read_unlock();
+
+free_key:
+	kfree(key);
+err_put:
+	fdput(f);
+	return err;
+}
+
+/* last field in 'union bpf_attr' used by this command */
+#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
+
+static int map_get_next_key(union bpf_attr *attr)
+{
+	void __user *ukey = u64_to_ptr(attr->key);
+	void __user *unext_key = u64_to_ptr(attr->next_key);
+	int ufd = attr->map_fd;
+	struct fd f = fdget(ufd);
+	struct bpf_map *map;
+	void *key, *next_key;
+	int err;
+
+	if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
+		return -EINVAL;
+
+	map = bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	err = -ENOMEM;
+	key = kmalloc(map->key_size, GFP_USER);
+	if (!key)
+		goto err_put;
+
+	err = -EFAULT;
+	if (copy_from_user(key, ukey, map->key_size) != 0)
+		goto free_key;
+
+	err = -ENOMEM;
+	next_key = kmalloc(map->key_size, GFP_USER);
+	if (!next_key)
+		goto free_key;
+
+	rcu_read_lock();
+	err = map->ops->map_get_next_key(map, key, next_key);
+	rcu_read_unlock();
+	if (err)
+		goto free_next_key;
+
+	err = -EFAULT;
+	if (copy_to_user(unext_key, next_key, map->key_size) != 0)
+		goto free_next_key;
+
+	err = 0;
+
+free_next_key:
+	kfree(next_key);
+free_key:
+	kfree(key);
+err_put:
+	fdput(f);
+	return err;
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
 	union bpf_attr attr = {};
@@ -160,6 +383,18 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_MAP_CREATE:
 		err = map_create(&attr);
 		break;
+	case BPF_MAP_LOOKUP_ELEM:
+		err = map_lookup_elem(&attr);
+		break;
+	case BPF_MAP_UPDATE_ELEM:
+		err = map_update_elem(&attr);
+		break;
+	case BPF_MAP_DELETE_ELEM:
+		err = map_delete_elem(&attr);
+		break;
+	case BPF_MAP_GET_NEXT_KEY:
+		err = map_get_next_key(&attr);
+		break;
 	default:
 		err = -EINVAL;
 		break;
-- 
cgit v1.2.3


From 09756af46893c18839062976c3252e93a1beeba7 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:17:00 -0700
Subject: bpf: expand BPF syscall with program load/unload

eBPF programs are similar to kernel modules. They are loaded by the user
process and automatically unloaded when process exits. Each eBPF program is
a safe run-to-completion set of instructions. eBPF verifier statically
determines that the program terminates and is safe to execute.

The following syscall wrapper can be used to load the program:
int bpf_prog_load(enum bpf_prog_type prog_type,
                  const struct bpf_insn *insns, int insn_cnt,
                  const char *license)
{
    union bpf_attr attr = {
        .prog_type = prog_type,
        .insns = ptr_to_u64(insns),
        .insn_cnt = insn_cnt,
        .license = ptr_to_u64(license),
    };

    return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
}
where 'insns' is an array of eBPF instructions and 'license' is a string
that must be GPL compatible to call helper functions marked gpl_only

Upon succesful load the syscall returns prog_fd.
Use close(prog_fd) to unload the program.

User space tests and examples follow in the later patches

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |  38 +++++++++++
 include/linux/filter.h   |   8 +--
 include/uapi/linux/bpf.h |  26 ++++++++
 kernel/bpf/core.c        |  29 +++++----
 kernel/bpf/syscall.c     | 165 +++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 246 insertions(+), 20 deletions(-)

(limited to 'include/uapi')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2887f3f9da59..92979182be81 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -46,4 +46,42 @@ void bpf_register_map_type(struct bpf_map_type_list *tl);
 void bpf_map_put(struct bpf_map *map);
 struct bpf_map *bpf_map_get(struct fd f);
 
+/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
+ * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL
+ * instructions after verifying
+ */
+struct bpf_func_proto {
+	u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+	bool gpl_only;
+};
+
+struct bpf_verifier_ops {
+	/* return eBPF function prototype for verification */
+	const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
+};
+
+struct bpf_prog_type_list {
+	struct list_head list_node;
+	struct bpf_verifier_ops *ops;
+	enum bpf_prog_type type;
+};
+
+void bpf_register_prog_type(struct bpf_prog_type_list *tl);
+
+struct bpf_prog;
+
+struct bpf_prog_aux {
+	atomic_t refcnt;
+	bool is_gpl_compatible;
+	enum bpf_prog_type prog_type;
+	struct bpf_verifier_ops *ops;
+	struct bpf_map **used_maps;
+	u32 used_map_cnt;
+	struct bpf_prog *prog;
+	struct work_struct work;
+};
+
+void bpf_prog_put(struct bpf_prog *prog);
+struct bpf_prog *bpf_prog_get(u32 ufd);
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1a0bc6d134d7..4ffc0958d85e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -21,6 +21,7 @@
 struct sk_buff;
 struct sock;
 struct seccomp_data;
+struct bpf_prog_aux;
 
 /* ArgX, context and stack frame pointer register positions. Note,
  * Arg1, Arg2, Arg3, etc are used as argument mappings of function
@@ -300,17 +301,12 @@ struct bpf_binary_header {
 	u8 image[];
 };
 
-struct bpf_work_struct {
-	struct bpf_prog *prog;
-	struct work_struct work;
-};
-
 struct bpf_prog {
 	u16			pages;		/* Number of allocated pages */
 	bool			jited;		/* Is our filter JIT'ed? */
 	u32			len;		/* Number of filter blocks */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
-	struct bpf_work_struct	*work;		/* Deferred free work struct */
+	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
 	unsigned int		(*bpf_func)(const struct sk_buff *skb,
 					    const struct bpf_insn *filter);
 	/* Instructions for interpreter */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 395cabd2ca0a..424f442016e7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -99,12 +99,23 @@ enum bpf_cmd {
 	 * returns zero and stores next key or negative error
 	 */
 	BPF_MAP_GET_NEXT_KEY,
+
+	/* verify and load eBPF program
+	 * prog_fd = bpf(BPF_PROG_LOAD, union bpf_attr *attr, u32 size)
+	 * Using attr->prog_type, attr->insns, attr->license
+	 * returns fd or negative error
+	 */
+	BPF_PROG_LOAD,
 };
 
 enum bpf_map_type {
 	BPF_MAP_TYPE_UNSPEC,
 };
 
+enum bpf_prog_type {
+	BPF_PROG_TYPE_UNSPEC,
+};
+
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
 		__u32	map_type;	/* one of enum bpf_map_type */
@@ -121,6 +132,21 @@ union bpf_attr {
 			__aligned_u64 next_key;
 		};
 	};
+
+	struct { /* anonymous struct used by BPF_PROG_LOAD command */
+		__u32		prog_type;	/* one of enum bpf_prog_type */
+		__u32		insn_cnt;
+		__aligned_u64	insns;
+		__aligned_u64	license;
+	};
 } __attribute__((aligned(8)));
 
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+enum bpf_func_id {
+	BPF_FUNC_unspec,
+	__BPF_FUNC_MAX_ID,
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 8b7002488251..f0c30c59b317 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -27,6 +27,7 @@
 #include <linux/random.h>
 #include <linux/moduleloader.h>
 #include <asm/unaligned.h>
+#include <linux/bpf.h>
 
 /* Registers */
 #define BPF_R0	regs[BPF_REG_0]
@@ -71,7 +72,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 {
 	gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
 			  gfp_extra_flags;
-	struct bpf_work_struct *ws;
+	struct bpf_prog_aux *aux;
 	struct bpf_prog *fp;
 
 	size = round_up(size, PAGE_SIZE);
@@ -79,14 +80,14 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 	if (fp == NULL)
 		return NULL;
 
-	ws = kmalloc(sizeof(*ws), GFP_KERNEL | gfp_extra_flags);
-	if (ws == NULL) {
+	aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags);
+	if (aux == NULL) {
 		vfree(fp);
 		return NULL;
 	}
 
 	fp->pages = size / PAGE_SIZE;
-	fp->work = ws;
+	fp->aux = aux;
 
 	return fp;
 }
@@ -110,10 +111,10 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 		memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
 		fp->pages = size / PAGE_SIZE;
 
-		/* We keep fp->work from fp_old around in the new
+		/* We keep fp->aux from fp_old around in the new
 		 * reallocated structure.
 		 */
-		fp_old->work = NULL;
+		fp_old->aux = NULL;
 		__bpf_prog_free(fp_old);
 	}
 
@@ -123,7 +124,7 @@ EXPORT_SYMBOL_GPL(bpf_prog_realloc);
 
 void __bpf_prog_free(struct bpf_prog *fp)
 {
-	kfree(fp->work);
+	kfree(fp->aux);
 	vfree(fp);
 }
 EXPORT_SYMBOL_GPL(__bpf_prog_free);
@@ -638,19 +639,19 @@ EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
 
 static void bpf_prog_free_deferred(struct work_struct *work)
 {
-	struct bpf_work_struct *ws;
+	struct bpf_prog_aux *aux;
 
-	ws = container_of(work, struct bpf_work_struct, work);
-	bpf_jit_free(ws->prog);
+	aux = container_of(work, struct bpf_prog_aux, work);
+	bpf_jit_free(aux->prog);
 }
 
 /* Free internal BPF program */
 void bpf_prog_free(struct bpf_prog *fp)
 {
-	struct bpf_work_struct *ws = fp->work;
+	struct bpf_prog_aux *aux = fp->aux;
 
-	INIT_WORK(&ws->work, bpf_prog_free_deferred);
-	ws->prog = fp;
-	schedule_work(&ws->work);
+	INIT_WORK(&aux->work, bpf_prog_free_deferred);
+	aux->prog = fp;
+	schedule_work(&aux->work);
 }
 EXPORT_SYMBOL_GPL(bpf_prog_free);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f94349ecaf61..0afb4eaa1887 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -14,6 +14,8 @@
 #include <linux/slab.h>
 #include <linux/anon_inodes.h>
 #include <linux/file.h>
+#include <linux/license.h>
+#include <linux/filter.h>
 
 static LIST_HEAD(bpf_map_types);
 
@@ -334,6 +336,166 @@ err_put:
 	return err;
 }
 
+static LIST_HEAD(bpf_prog_types);
+
+static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
+{
+	struct bpf_prog_type_list *tl;
+
+	list_for_each_entry(tl, &bpf_prog_types, list_node) {
+		if (tl->type == type) {
+			prog->aux->ops = tl->ops;
+			prog->aux->prog_type = type;
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
+void bpf_register_prog_type(struct bpf_prog_type_list *tl)
+{
+	list_add(&tl->list_node, &bpf_prog_types);
+}
+
+/* drop refcnt on maps used by eBPF program and free auxilary data */
+static void free_used_maps(struct bpf_prog_aux *aux)
+{
+	int i;
+
+	for (i = 0; i < aux->used_map_cnt; i++)
+		bpf_map_put(aux->used_maps[i]);
+
+	kfree(aux->used_maps);
+}
+
+void bpf_prog_put(struct bpf_prog *prog)
+{
+	if (atomic_dec_and_test(&prog->aux->refcnt)) {
+		free_used_maps(prog->aux);
+		bpf_prog_free(prog);
+	}
+}
+
+static int bpf_prog_release(struct inode *inode, struct file *filp)
+{
+	struct bpf_prog *prog = filp->private_data;
+
+	bpf_prog_put(prog);
+	return 0;
+}
+
+static const struct file_operations bpf_prog_fops = {
+        .release = bpf_prog_release,
+};
+
+static struct bpf_prog *get_prog(struct fd f)
+{
+	struct bpf_prog *prog;
+
+	if (!f.file)
+		return ERR_PTR(-EBADF);
+
+	if (f.file->f_op != &bpf_prog_fops) {
+		fdput(f);
+		return ERR_PTR(-EINVAL);
+	}
+
+	prog = f.file->private_data;
+
+	return prog;
+}
+
+/* called by sockets/tracing/seccomp before attaching program to an event
+ * pairs with bpf_prog_put()
+ */
+struct bpf_prog *bpf_prog_get(u32 ufd)
+{
+	struct fd f = fdget(ufd);
+	struct bpf_prog *prog;
+
+	prog = get_prog(f);
+
+	if (IS_ERR(prog))
+		return prog;
+
+	atomic_inc(&prog->aux->refcnt);
+	fdput(f);
+	return prog;
+}
+
+/* last field in 'union bpf_attr' used by this command */
+#define	BPF_PROG_LOAD_LAST_FIELD license
+
+static int bpf_prog_load(union bpf_attr *attr)
+{
+	enum bpf_prog_type type = attr->prog_type;
+	struct bpf_prog *prog;
+	int err;
+	char license[128];
+	bool is_gpl;
+
+	if (CHECK_ATTR(BPF_PROG_LOAD))
+		return -EINVAL;
+
+	/* copy eBPF program license from user space */
+	if (strncpy_from_user(license, u64_to_ptr(attr->license),
+			      sizeof(license) - 1) < 0)
+		return -EFAULT;
+	license[sizeof(license) - 1] = 0;
+
+	/* eBPF programs must be GPL compatible to use GPL-ed functions */
+	is_gpl = license_is_gpl_compatible(license);
+
+	if (attr->insn_cnt >= BPF_MAXINSNS)
+		return -EINVAL;
+
+	/* plain bpf_prog allocation */
+	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
+	if (!prog)
+		return -ENOMEM;
+
+	prog->len = attr->insn_cnt;
+
+	err = -EFAULT;
+	if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
+			   prog->len * sizeof(struct bpf_insn)) != 0)
+		goto free_prog;
+
+	prog->orig_prog = NULL;
+	prog->jited = false;
+
+	atomic_set(&prog->aux->refcnt, 1);
+	prog->aux->is_gpl_compatible = is_gpl;
+
+	/* find program type: socket_filter vs tracing_filter */
+	err = find_prog_type(type, prog);
+	if (err < 0)
+		goto free_prog;
+
+	/* run eBPF verifier */
+	/* err = bpf_check(prog, tb); */
+
+	if (err < 0)
+		goto free_used_maps;
+
+	/* eBPF program is ready to be JITed */
+	bpf_prog_select_runtime(prog);
+
+	err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC);
+
+	if (err < 0)
+		/* failed to allocate fd */
+		goto free_used_maps;
+
+	return err;
+
+free_used_maps:
+	free_used_maps(prog->aux);
+free_prog:
+	bpf_prog_free(prog);
+	return err;
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
 	union bpf_attr attr = {};
@@ -395,6 +557,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_MAP_GET_NEXT_KEY:
 		err = map_get_next_key(&attr);
 		break;
+	case BPF_PROG_LOAD:
+		err = bpf_prog_load(&attr);
+		break;
 	default:
 		err = -EINVAL;
 		break;
-- 
cgit v1.2.3


From cbd357008604925355ae7b54a09137dabb81b580 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:17:03 -0700
Subject: bpf: verifier (add ability to receive verification log)

add optional attributes for BPF_PROG_LOAD syscall:
union bpf_attr {
    struct {
	...
	__u32         log_level; /* verbosity level of eBPF verifier */
	__u32         log_size;  /* size of user buffer */
	__aligned_u64 log_buf;   /* user supplied 'char *buffer' */
    };
};

when log_level > 0 the verifier will return its verification log in the user
supplied buffer 'log_buf' which can be used by program author to analyze why
verifier rejected given program.

'Understanding eBPF verifier messages' section of Documentation/networking/filter.txt
provides several examples of these messages, like the program:

  BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
  BPF_LD_MAP_FD(BPF_REG_1, 0),
  BPF_CALL_FUNC(BPF_FUNC_map_lookup_elem),
  BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
  BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
  BPF_EXIT_INSN(),

will be rejected with the following multi-line message in log_buf:

  0: (7a) *(u64 *)(r10 -8) = 0
  1: (bf) r2 = r10
  2: (07) r2 += -8
  3: (b7) r1 = 0
  4: (85) call 1
  5: (15) if r0 == 0x0 goto pc+1
   R0=map_ptr R10=fp
  6: (7a) *(u64 *)(r0 +4) = 0
  misaligned access off 4 size 8

The format of the output can change at any time as verifier evolves.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h |   3 +
 kernel/bpf/syscall.c     |   2 +-
 kernel/bpf/verifier.c    | 235 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 239 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 424f442016e7..31b0ac208a52 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -138,6 +138,9 @@ union bpf_attr {
 		__u32		insn_cnt;
 		__aligned_u64	insns;
 		__aligned_u64	license;
+		__u32		log_level;	/* verbosity level of verifier */
+		__u32		log_size;	/* size of user buffer */
+		__aligned_u64	log_buf;	/* user supplied buffer */
 	};
 } __attribute__((aligned(8)));
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 74b3628c5fdb..ba61c8c16032 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -458,7 +458,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
 }
 
 /* last field in 'union bpf_attr' used by this command */
-#define	BPF_PROG_LOAD_LAST_FIELD license
+#define	BPF_PROG_LOAD_LAST_FIELD log_buf
 
 static int bpf_prog_load(union bpf_attr *attr)
 {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d6f9c3d6b4d7..871edc1f2e1f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -125,9 +125,244 @@
  * are set to NOT_INIT to indicate that they are no longer readable.
  */
 
+/* single container for all structs
+ * one verifier_env per bpf_check() call
+ */
+struct verifier_env {
+};
+
+/* verbose verifier prints what it's seeing
+ * bpf_check() is called under lock, so no race to access these global vars
+ */
+static u32 log_level, log_size, log_len;
+static char *log_buf;
+
+static DEFINE_MUTEX(bpf_verifier_lock);
+
+/* log_level controls verbosity level of eBPF verifier.
+ * verbose() is used to dump the verification trace to the log, so the user
+ * can figure out what's wrong with the program
+ */
+static void verbose(const char *fmt, ...)
+{
+	va_list args;
+
+	if (log_level == 0 || log_len >= log_size - 1)
+		return;
+
+	va_start(args, fmt);
+	log_len += vscnprintf(log_buf + log_len, log_size - log_len, fmt, args);
+	va_end(args);
+}
+
+static const char *const bpf_class_string[] = {
+	[BPF_LD]    = "ld",
+	[BPF_LDX]   = "ldx",
+	[BPF_ST]    = "st",
+	[BPF_STX]   = "stx",
+	[BPF_ALU]   = "alu",
+	[BPF_JMP]   = "jmp",
+	[BPF_RET]   = "BUG",
+	[BPF_ALU64] = "alu64",
+};
+
+static const char *const bpf_alu_string[] = {
+	[BPF_ADD >> 4]  = "+=",
+	[BPF_SUB >> 4]  = "-=",
+	[BPF_MUL >> 4]  = "*=",
+	[BPF_DIV >> 4]  = "/=",
+	[BPF_OR  >> 4]  = "|=",
+	[BPF_AND >> 4]  = "&=",
+	[BPF_LSH >> 4]  = "<<=",
+	[BPF_RSH >> 4]  = ">>=",
+	[BPF_NEG >> 4]  = "neg",
+	[BPF_MOD >> 4]  = "%=",
+	[BPF_XOR >> 4]  = "^=",
+	[BPF_MOV >> 4]  = "=",
+	[BPF_ARSH >> 4] = "s>>=",
+	[BPF_END >> 4]  = "endian",
+};
+
+static const char *const bpf_ldst_string[] = {
+	[BPF_W >> 3]  = "u32",
+	[BPF_H >> 3]  = "u16",
+	[BPF_B >> 3]  = "u8",
+	[BPF_DW >> 3] = "u64",
+};
+
+static const char *const bpf_jmp_string[] = {
+	[BPF_JA >> 4]   = "jmp",
+	[BPF_JEQ >> 4]  = "==",
+	[BPF_JGT >> 4]  = ">",
+	[BPF_JGE >> 4]  = ">=",
+	[BPF_JSET >> 4] = "&",
+	[BPF_JNE >> 4]  = "!=",
+	[BPF_JSGT >> 4] = "s>",
+	[BPF_JSGE >> 4] = "s>=",
+	[BPF_CALL >> 4] = "call",
+	[BPF_EXIT >> 4] = "exit",
+};
+
+static void print_bpf_insn(struct bpf_insn *insn)
+{
+	u8 class = BPF_CLASS(insn->code);
+
+	if (class == BPF_ALU || class == BPF_ALU64) {
+		if (BPF_SRC(insn->code) == BPF_X)
+			verbose("(%02x) %sr%d %s %sr%d\n",
+				insn->code, class == BPF_ALU ? "(u32) " : "",
+				insn->dst_reg,
+				bpf_alu_string[BPF_OP(insn->code) >> 4],
+				class == BPF_ALU ? "(u32) " : "",
+				insn->src_reg);
+		else
+			verbose("(%02x) %sr%d %s %s%d\n",
+				insn->code, class == BPF_ALU ? "(u32) " : "",
+				insn->dst_reg,
+				bpf_alu_string[BPF_OP(insn->code) >> 4],
+				class == BPF_ALU ? "(u32) " : "",
+				insn->imm);
+	} else if (class == BPF_STX) {
+		if (BPF_MODE(insn->code) == BPF_MEM)
+			verbose("(%02x) *(%s *)(r%d %+d) = r%d\n",
+				insn->code,
+				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+				insn->dst_reg,
+				insn->off, insn->src_reg);
+		else if (BPF_MODE(insn->code) == BPF_XADD)
+			verbose("(%02x) lock *(%s *)(r%d %+d) += r%d\n",
+				insn->code,
+				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+				insn->dst_reg, insn->off,
+				insn->src_reg);
+		else
+			verbose("BUG_%02x\n", insn->code);
+	} else if (class == BPF_ST) {
+		if (BPF_MODE(insn->code) != BPF_MEM) {
+			verbose("BUG_st_%02x\n", insn->code);
+			return;
+		}
+		verbose("(%02x) *(%s *)(r%d %+d) = %d\n",
+			insn->code,
+			bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+			insn->dst_reg,
+			insn->off, insn->imm);
+	} else if (class == BPF_LDX) {
+		if (BPF_MODE(insn->code) != BPF_MEM) {
+			verbose("BUG_ldx_%02x\n", insn->code);
+			return;
+		}
+		verbose("(%02x) r%d = *(%s *)(r%d %+d)\n",
+			insn->code, insn->dst_reg,
+			bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+			insn->src_reg, insn->off);
+	} else if (class == BPF_LD) {
+		if (BPF_MODE(insn->code) == BPF_ABS) {
+			verbose("(%02x) r0 = *(%s *)skb[%d]\n",
+				insn->code,
+				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+				insn->imm);
+		} else if (BPF_MODE(insn->code) == BPF_IND) {
+			verbose("(%02x) r0 = *(%s *)skb[r%d + %d]\n",
+				insn->code,
+				bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+				insn->src_reg, insn->imm);
+		} else if (BPF_MODE(insn->code) == BPF_IMM) {
+			verbose("(%02x) r%d = 0x%x\n",
+				insn->code, insn->dst_reg, insn->imm);
+		} else {
+			verbose("BUG_ld_%02x\n", insn->code);
+			return;
+		}
+	} else if (class == BPF_JMP) {
+		u8 opcode = BPF_OP(insn->code);
+
+		if (opcode == BPF_CALL) {
+			verbose("(%02x) call %d\n", insn->code, insn->imm);
+		} else if (insn->code == (BPF_JMP | BPF_JA)) {
+			verbose("(%02x) goto pc%+d\n",
+				insn->code, insn->off);
+		} else if (insn->code == (BPF_JMP | BPF_EXIT)) {
+			verbose("(%02x) exit\n", insn->code);
+		} else if (BPF_SRC(insn->code) == BPF_X) {
+			verbose("(%02x) if r%d %s r%d goto pc%+d\n",
+				insn->code, insn->dst_reg,
+				bpf_jmp_string[BPF_OP(insn->code) >> 4],
+				insn->src_reg, insn->off);
+		} else {
+			verbose("(%02x) if r%d %s 0x%x goto pc%+d\n",
+				insn->code, insn->dst_reg,
+				bpf_jmp_string[BPF_OP(insn->code) >> 4],
+				insn->imm, insn->off);
+		}
+	} else {
+		verbose("(%02x) %s\n", insn->code, bpf_class_string[class]);
+	}
+}
+
 int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
 {
+	char __user *log_ubuf = NULL;
+	struct verifier_env *env;
 	int ret = -EINVAL;
 
+	if (prog->len <= 0 || prog->len > BPF_MAXINSNS)
+		return -E2BIG;
+
+	/* 'struct verifier_env' can be global, but since it's not small,
+	 * allocate/free it every time bpf_check() is called
+	 */
+	env = kzalloc(sizeof(struct verifier_env), GFP_KERNEL);
+	if (!env)
+		return -ENOMEM;
+
+	/* grab the mutex to protect few globals used by verifier */
+	mutex_lock(&bpf_verifier_lock);
+
+	if (attr->log_level || attr->log_buf || attr->log_size) {
+		/* user requested verbose verifier output
+		 * and supplied buffer to store the verification trace
+		 */
+		log_level = attr->log_level;
+		log_ubuf = (char __user *) (unsigned long) attr->log_buf;
+		log_size = attr->log_size;
+		log_len = 0;
+
+		ret = -EINVAL;
+		/* log_* values have to be sane */
+		if (log_size < 128 || log_size > UINT_MAX >> 8 ||
+		    log_level == 0 || log_ubuf == NULL)
+			goto free_env;
+
+		ret = -ENOMEM;
+		log_buf = vmalloc(log_size);
+		if (!log_buf)
+			goto free_env;
+	} else {
+		log_level = 0;
+	}
+
+	/* ret = do_check(env); */
+
+	if (log_level && log_len >= log_size - 1) {
+		BUG_ON(log_len >= log_size);
+		/* verifier log exceeded user supplied buffer */
+		ret = -ENOSPC;
+		/* fall through to return what was recorded */
+	}
+
+	/* copy verifier log back to user space including trailing zero */
+	if (log_level && copy_to_user(log_ubuf, log_buf, log_len + 1) != 0) {
+		ret = -EFAULT;
+		goto free_log_buf;
+	}
+
+
+free_log_buf:
+	if (log_level)
+		vfree(log_buf);
+free_env:
+	kfree(env);
+	mutex_unlock(&bpf_verifier_lock);
 	return ret;
 }
-- 
cgit v1.2.3


From e3118e8359bb7c59555aca60c725106e6d78c5ce Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Fri, 26 Sep 2014 22:37:36 +0200
Subject: net: tcp: add DCTCP congestion control algorithm

This work adds the DataCenter TCP (DCTCP) congestion control
algorithm [1], which has been first published at SIGCOMM 2010 [2],
resp. follow-up analysis at SIGMETRICS 2011 [3] (and also, more
recently as an informational IETF draft available at [4]).

DCTCP is an enhancement to the TCP congestion control algorithm for
data center networks. Typical data center workloads are i.e.
i) partition/aggregate (queries; bursty, delay sensitive), ii) short
messages e.g. 50KB-1MB (for coordination and control state; delay
sensitive), and iii) large flows e.g. 1MB-100MB (data update;
throughput sensitive). DCTCP has therefore been designed for such
environments to provide/achieve the following three requirements:

  * High burst tolerance (incast due to partition/aggregate)
  * Low latency (short flows, queries)
  * High throughput (continuous data updates, large file
    transfers) with commodity, shallow buffered switches

The basic idea of its design consists of two fundamentals: i) on the
switch side, packets are being marked when its internal queue
length > threshold K (K is chosen so that a large enough headroom
for marked traffic is still available in the switch queue); ii) the
sender/host side maintains a moving average of the fraction of marked
packets, so each RTT, F is being updated as follows:

 F := X / Y, where X is # of marked ACKs, Y is total # of ACKs
 alpha := (1 - g) * alpha + g * F, where g is a smoothing constant

The resulting alpha (iow: probability that switch queue is congested)
is then being used in order to adaptively decrease the congestion
window W:

 W := (1 - (alpha / 2)) * W

The means for receiving marked packets resp. marking them on switch
side in DCTCP is the use of ECN.

RFC3168 describes a mechanism for using Explicit Congestion Notification
from the switch for early detection of congestion, rather than waiting
for segment loss to occur.

However, this method only detects the presence of congestion, not
the *extent*. In the presence of mild congestion, it reduces the TCP
congestion window too aggressively and unnecessarily affects the
throughput of long flows [4].

DCTCP, as mentioned, enhances Explicit Congestion Notification (ECN)
processing to estimate the fraction of bytes that encounter congestion,
rather than simply detecting that some congestion has occurred. DCTCP
then scales the TCP congestion window based on this estimate [4],
thus it can derive multibit feedback from the information present in
the single-bit sequence of marks in its control law. And thus act in
*proportion* to the extent of congestion, not its *presence*.

Switches therefore set the Congestion Experienced (CE) codepoint in
packets when internal queue lengths exceed threshold K. Resulting,
DCTCP delivers the same or better throughput than normal TCP, while
using 90% less buffer space.

It was found in [2] that DCTCP enables the applications to handle 10x
the current background traffic, without impacting foreground traffic.
Moreover, a 10x increase in foreground traffic did not cause any
timeouts, and thus largely eliminates TCP incast collapse problems.

The algorithm itself has already seen deployments in large production
data centers since then.

We did a long-term stress-test and analysis in a data center, short
summary of our TCP incast tests with iperf compared to cubic:

This test measured DCTCP throughput and latency and compared it with
CUBIC throughput and latency for an incast scenario. In this test, 19
senders sent at maximum rate to a single receiver. The receiver simply
ran iperf -s.

The senders ran iperf -c <receiver> -t 30. All senders started
simultaneously (using local clocks synchronized by ntp).

This test was repeated multiple times. Below shows the results from a
single test. Other tests are similar. (DCTCP results were extremely
consistent, CUBIC results show some variance induced by the TCP timeouts
that CUBIC encountered.)

For this test, we report statistics on the number of TCP timeouts,
flow throughput, and traffic latency.

1) Timeouts (total over all flows, and per flow summaries):

            CUBIC            DCTCP
  Total     3227             25
  Mean       169.842          1.316
  Median     183              1
  Max        207              5
  Min        123              0
  Stddev      28.991          1.600

Timeout data is taken by measuring the net change in netstat -s
"other TCP timeouts" reported. As a result, the timeout measurements
above are not restricted to the test traffic, and we believe that it
is likely that all of the "DCTCP timeouts" are actually timeouts for
non-test traffic. We report them nevertheless. CUBIC will also include
some non-test timeouts, but they are drawfed by bona fide test traffic
timeouts for CUBIC. Clearly DCTCP does an excellent job of preventing
TCP timeouts. DCTCP reduces timeouts by at least two orders of
magnitude and may well have eliminated them in this scenario.

2) Throughput (per flow in Mbps):

            CUBIC            DCTCP
  Mean      521.684          521.895
  Median    464              523
  Max       776              527
  Min       403              519
  Stddev    105.891            2.601
  Fairness    0.962            0.999

Throughput data was simply the average throughput for each flow
reported by iperf. By avoiding TCP timeouts, DCTCP is able to
achieve much better per-flow results. In CUBIC, many flows
experience TCP timeouts which makes flow throughput unpredictable and
unfair. DCTCP, on the other hand, provides very clean predictable
throughput without incurring TCP timeouts. Thus, the standard deviation
of CUBIC throughput is dramatically higher than the standard deviation
of DCTCP throughput.

Mean throughput is nearly identical because even though cubic flows
suffer TCP timeouts, other flows will step in and fill the unused
bandwidth. Note that this test is something of a best case scenario
for incast under CUBIC: it allows other flows to fill in for flows
experiencing a timeout. Under situations where the receiver is issuing
requests and then waiting for all flows to complete, flows cannot fill
in for timed out flows and throughput will drop dramatically.

3) Latency (in ms):

            CUBIC            DCTCP
  Mean      4.0088           0.04219
  Median    4.055            0.0395
  Max       4.2              0.085
  Min       3.32             0.028
  Stddev    0.1666           0.01064

Latency for each protocol was computed by running "ping -i 0.2
<receiver>" from a single sender to the receiver during the incast
test. For DCTCP, "ping -Q 0x6 -i 0.2 <receiver>" was used to ensure
that traffic traversed the DCTCP queue and was not dropped when the
queue size was greater than the marking threshold. The summary
statistics above are over all ping metrics measured between the single
sender, receiver pair.

The latency results for this test show a dramatic difference between
CUBIC and DCTCP. CUBIC intentionally overflows the switch buffer
which incurs the maximum queue latency (more buffer memory will lead
to high latency.) DCTCP, on the other hand, deliberately attempts to
keep queue occupancy low. The result is a two orders of magnitude
reduction of latency with DCTCP - even with a switch with relatively
little RAM. Switches with larger amounts of RAM will incur increasing
amounts of latency for CUBIC, but not for DCTCP.

4) Convergence and stability test:

This test measured the time that DCTCP took to fairly redistribute
bandwidth when a new flow commences. It also measured DCTCP's ability
to remain stable at a fair bandwidth distribution. DCTCP is compared
with CUBIC for this test.

At the commencement of this test, a single flow is sending at maximum
rate (near 10 Gbps) to a single receiver. One second after that first
flow commences, a new flow from a distinct server begins sending to
the same receiver as the first flow. After the second flow has sent
data for 10 seconds, the second flow is terminated. The first flow
sends for an additional second. Ideally, the bandwidth would be evenly
shared as soon as the second flow starts, and recover as soon as it
stops.

The results of this test are shown below. Note that the flow bandwidth
for the two flows was measured near the same time, but not
simultaneously.

DCTCP performs nearly perfectly within the measurement limitations
of this test: bandwidth is quickly distributed fairly between the two
flows, remains stable throughout the duration of the test, and
recovers quickly. CUBIC, in contrast, is slow to divide the bandwidth
fairly, and has trouble remaining stable.

  CUBIC                      DCTCP

  Seconds  Flow 1  Flow 2    Seconds  Flow 1  Flow 2
   0       9.93    0          0       9.92    0
   0.5     9.87    0          0.5     9.86    0
   1       8.73    2.25       1       6.46    4.88
   1.5     7.29    2.8        1.5     4.9     4.99
   2       6.96    3.1        2       4.92    4.94
   2.5     6.67    3.34       2.5     4.93    5
   3       6.39    3.57       3       4.92    4.99
   3.5     6.24    3.75       3.5     4.94    4.74
   4       6       3.94       4       5.34    4.71
   4.5     5.88    4.09       4.5     4.99    4.97
   5       5.27    4.98       5       4.83    5.01
   5.5     4.93    5.04       5.5     4.89    4.99
   6       4.9     4.99       6       4.92    5.04
   6.5     4.93    5.1        6.5     4.91    4.97
   7       4.28    5.8        7       4.97    4.97
   7.5     4.62    4.91       7.5     4.99    4.82
   8       5.05    4.45       8       5.16    4.76
   8.5     5.93    4.09       8.5     4.94    4.98
   9       5.73    4.2        9       4.92    5.02
   9.5     5.62    4.32       9.5     4.87    5.03
  10       6.12    3.2       10       4.91    5.01
  10.5     6.91    3.11      10.5     4.87    5.04
  11       8.48    0         11       8.49    4.94
  11.5     9.87    0         11.5     9.9     0

SYN/ACK ECT test:

This test demonstrates the importance of ECT on SYN and SYN-ACK packets
by measuring the connection probability in the presence of competing
flows for a DCTCP connection attempt *without* ECT in the SYN packet.
The test was repeated five times for each number of competing flows.

              Competing Flows  1 |    2 |    4 |    8 |   16
                               ------------------------------
Mean Connection Probability    1 | 0.67 | 0.45 | 0.28 |    0
Median Connection Probability  1 | 0.65 | 0.45 | 0.25 |    0

As the number of competing flows moves beyond 1, the connection
probability drops rapidly.

Enabling DCTCP with this patch requires the following steps:

DCTCP must be running both on the sender and receiver side in your
data center, i.e.:

  sysctl -w net.ipv4.tcp_congestion_control=dctcp

Also, ECN functionality must be enabled on all switches in your
data center for DCTCP to work. The default ECN marking threshold (K)
heuristic on the switch for DCTCP is e.g., 20 packets (30KB) at
1Gbps, and 65 packets (~100KB) at 10Gbps (K > 1/7 * C * RTT, [4]).

In above tests, for each switch port, traffic was segregated into two
queues. For any packet with a DSCP of 0x01 - or equivalently a TOS of
0x04 - the packet was placed into the DCTCP queue. All other packets
were placed into the default drop-tail queue. For the DCTCP queue,
RED/ECN marking was enabled, here, with a marking threshold of 75 KB.
More details however, we refer you to the paper [2] under section 3).

There are no code changes required to applications running in user
space. DCTCP has been implemented in full *isolation* of the rest of
the TCP code as its own congestion control module, so that it can run
without a need to expose code to the core of the TCP stack, and thus
nothing changes for non-DCTCP users.

Changes in the CA framework code are minimal, and DCTCP algorithm
operates on mechanisms that are already available in most Silicon.
The gain (dctcp_shift_g) is currently a fixed constant (1/16) from
the paper, but we leave the option that it can be chosen carefully
to a different value by the user.

In case DCTCP is being used and ECN support on peer site is off,
DCTCP falls back after 3WHS to operate in normal TCP Reno mode.

ss {-4,-6} -t -i diag interface:

  ... dctcp wscale:7,7 rto:203 rtt:2.349/0.026 mss:1448 cwnd:2054
  ssthresh:1102 ce_state 0 alpha 15 ab_ecn 0 ab_tot 735584
  send 10129.2Mbps pacing_rate 20254.1Mbps unacked:1822 retrans:0/15
  reordering:101 rcv_space:29200

  ... dctcp-reno wscale:7,7 rto:201 rtt:0.711/1.327 ato:40 mss:1448
  cwnd:10 ssthresh:1102 fallback_mode send 162.9Mbps pacing_rate
  325.5Mbps rcv_rtt:1.5 rcv_space:29200

More information about DCTCP can be found in [1-4].

  [1] http://simula.stanford.edu/~alizade/Site/DCTCP.html
  [2] http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp-final.pdf
  [3] http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp_analysis-full.pdf
  [4] http://tools.ietf.org/html/draft-bensley-tcpm-dctcp-00

Joint work with Florian Westphal and Glenn Judd.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Glenn Judd <glenn.judd@morganstanley.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dctcp.txt |  43 +++++
 include/uapi/linux/inet_diag.h     |  13 +-
 net/ipv4/Kconfig                   |  26 ++-
 net/ipv4/Makefile                  |   1 +
 net/ipv4/tcp_dctcp.c               | 344 +++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_output.c              |   1 +
 6 files changed, 425 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/networking/dctcp.txt
 create mode 100644 net/ipv4/tcp_dctcp.c

(limited to 'include/uapi')

diff --git a/Documentation/networking/dctcp.txt b/Documentation/networking/dctcp.txt
new file mode 100644
index 000000000000..0d5dfbc89ec9
--- /dev/null
+++ b/Documentation/networking/dctcp.txt
@@ -0,0 +1,43 @@
+DCTCP (DataCenter TCP)
+----------------------
+
+DCTCP is an enhancement to the TCP congestion control algorithm for data
+center networks and leverages Explicit Congestion Notification (ECN) in
+the data center network to provide multi-bit feedback to the end hosts.
+
+To enable it on end hosts:
+
+  sysctl -w net.ipv4.tcp_congestion_control=dctcp
+
+All switches in the data center network running DCTCP must support ECN
+marking and be configured for marking when reaching defined switch buffer
+thresholds. The default ECN marking threshold heuristic for DCTCP on
+switches is 20 packets (30KB) at 1Gbps, and 65 packets (~100KB) at 10Gbps,
+but might need further careful tweaking.
+
+For more details, see below documents:
+
+Paper:
+
+The algorithm is further described in detail in the following two
+SIGCOMM/SIGMETRICS papers:
+
+ i) Mohammad Alizadeh, Albert Greenberg, David A. Maltz, Jitendra Padhye,
+    Parveen Patel, Balaji Prabhakar, Sudipta Sengupta, and Murari Sridharan:
+      "Data Center TCP (DCTCP)", Data Center Networks session
+      Proc. ACM SIGCOMM, New Delhi, 2010.
+    http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp-final.pdf
+    http://www.sigcomm.org/ccr/papers/2010/October/1851275.1851192
+
+ii) Mohammad Alizadeh, Adel Javanmard, and Balaji Prabhakar:
+      "Analysis of DCTCP: Stability, Convergence, and Fairness"
+      Proc. ACM SIGMETRICS, San Jose, 2011.
+    http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp_analysis-full.pdf
+
+IETF informational draft:
+
+  http://tools.ietf.org/html/draft-bensley-tcpm-dctcp-00
+
+DCTCP site:
+
+  http://simula.stanford.edu/~alizade/Site/DCTCP.html
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index bbde90fa5838..d65c0a09efd3 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -110,10 +110,10 @@ enum {
 	INET_DIAG_TCLASS,
 	INET_DIAG_SKMEMINFO,
 	INET_DIAG_SHUTDOWN,
+	INET_DIAG_DCTCPINFO,
 };
 
-#define INET_DIAG_MAX INET_DIAG_SHUTDOWN
-
+#define INET_DIAG_MAX INET_DIAG_DCTCPINFO
 
 /* INET_DIAG_MEM */
 
@@ -133,5 +133,14 @@ struct tcpvegas_info {
 	__u32	tcpv_minrtt;
 };
 
+/* INET_DIAG_DCTCPINFO */
+
+struct tcp_dctcp_info {
+	__u16	dctcp_enabled;
+	__u16	dctcp_ce_state;
+	__u32	dctcp_alpha;
+	__u32	dctcp_ab_ecn;
+	__u32	dctcp_ab_tot;
+};
 
 #endif /* _UAPI_INET_DIAG_H_ */
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 84f710b7472a..69fb37854449 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -570,6 +570,27 @@ config TCP_CONG_ILLINOIS
 	For further details see:
 	  http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
 
+config TCP_CONG_DCTCP
+	tristate "DataCenter TCP (DCTCP)"
+	default n
+	---help---
+	DCTCP leverages Explicit Congestion Notification (ECN) in the network to
+	provide multi-bit feedback to the end hosts. It is designed to provide:
+
+	- High burst tolerance (incast due to partition/aggregate),
+	- Low latency (short flows, queries),
+	- High throughput (continuous data updates, large file transfers) with
+	  commodity, shallow-buffered switches.
+
+	All switches in the data center network running DCTCP must support
+	ECN marking and be configured for marking when reaching defined switch
+	buffer thresholds. The default ECN marking threshold heuristic for
+	DCTCP on switches is 20 packets (30KB) at 1Gbps, and 65 packets
+	(~100KB) at 10Gbps, but might need further careful tweaking.
+
+	For further details see:
+	  http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp-final.pdf
+
 choice
 	prompt "Default TCP congestion control"
 	default DEFAULT_CUBIC
@@ -598,9 +619,11 @@ choice
 	config DEFAULT_WESTWOOD
 		bool "Westwood" if TCP_CONG_WESTWOOD=y
 
+	config DEFAULT_DCTCP
+		bool "DCTCP" if TCP_CONG_DCTCP=y
+
 	config DEFAULT_RENO
 		bool "Reno"
-
 endchoice
 
 endif
@@ -620,6 +643,7 @@ config DEFAULT_TCP_CONG
 	default "westwood" if DEFAULT_WESTWOOD
 	default "veno" if DEFAULT_VENO
 	default "reno" if DEFAULT_RENO
+	default "dctcp" if DEFAULT_DCTCP
 	default "cubic"
 
 config TCP_MD5SIG
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index d78d404c596f..d8105787c199 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -43,6 +43,7 @@ obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
 obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
 obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
 obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
+obj-$(CONFIG_TCP_CONG_DCTCP) += tcp_dctcp.o
 obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o
 obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o
 obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
new file mode 100644
index 000000000000..b504371af742
--- /dev/null
+++ b/net/ipv4/tcp_dctcp.c
@@ -0,0 +1,344 @@
+/* DataCenter TCP (DCTCP) congestion control.
+ *
+ * http://simula.stanford.edu/~alizade/Site/DCTCP.html
+ *
+ * This is an implementation of DCTCP over Reno, an enhancement to the
+ * TCP congestion control algorithm designed for data centers. DCTCP
+ * leverages Explicit Congestion Notification (ECN) in the network to
+ * provide multi-bit feedback to the end hosts. DCTCP's goal is to meet
+ * the following three data center transport requirements:
+ *
+ *  - High burst tolerance (incast due to partition/aggregate)
+ *  - Low latency (short flows, queries)
+ *  - High throughput (continuous data updates, large file transfers)
+ *    with commodity shallow buffered switches
+ *
+ * The algorithm is described in detail in the following two papers:
+ *
+ * 1) Mohammad Alizadeh, Albert Greenberg, David A. Maltz, Jitendra Padhye,
+ *    Parveen Patel, Balaji Prabhakar, Sudipta Sengupta, and Murari Sridharan:
+ *      "Data Center TCP (DCTCP)", Data Center Networks session
+ *      Proc. ACM SIGCOMM, New Delhi, 2010.
+ *   http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp-final.pdf
+ *
+ * 2) Mohammad Alizadeh, Adel Javanmard, and Balaji Prabhakar:
+ *      "Analysis of DCTCP: Stability, Convergence, and Fairness"
+ *      Proc. ACM SIGMETRICS, San Jose, 2011.
+ *   http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp_analysis-full.pdf
+ *
+ * Initial prototype from Abdul Kabbani, Masato Yasuda and Mohammad Alizadeh.
+ *
+ * Authors:
+ *
+ *	Daniel Borkmann <dborkman@redhat.com>
+ *	Florian Westphal <fw@strlen.de>
+ *	Glenn Judd <glenn.judd@morganstanley.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <net/tcp.h>
+#include <linux/inet_diag.h>
+
+#define DCTCP_MAX_ALPHA	1024U
+
+struct dctcp {
+	u32 acked_bytes_ecn;
+	u32 acked_bytes_total;
+	u32 prior_snd_una;
+	u32 prior_rcv_nxt;
+	u32 dctcp_alpha;
+	u32 next_seq;
+	u32 ce_state;
+	u32 delayed_ack_reserved;
+};
+
+static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */
+module_param(dctcp_shift_g, uint, 0644);
+MODULE_PARM_DESC(dctcp_shift_g, "parameter g for updating dctcp_alpha");
+
+static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA;
+module_param(dctcp_alpha_on_init, uint, 0644);
+MODULE_PARM_DESC(dctcp_alpha_on_init, "parameter for initial alpha value");
+
+static unsigned int dctcp_clamp_alpha_on_loss __read_mostly;
+module_param(dctcp_clamp_alpha_on_loss, uint, 0644);
+MODULE_PARM_DESC(dctcp_clamp_alpha_on_loss,
+		 "parameter for clamping alpha on loss");
+
+static struct tcp_congestion_ops dctcp_reno;
+
+static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca)
+{
+	ca->next_seq = tp->snd_nxt;
+
+	ca->acked_bytes_ecn = 0;
+	ca->acked_bytes_total = 0;
+}
+
+static void dctcp_init(struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+
+	if ((tp->ecn_flags & TCP_ECN_OK) ||
+	    (sk->sk_state == TCP_LISTEN ||
+	     sk->sk_state == TCP_CLOSE)) {
+		struct dctcp *ca = inet_csk_ca(sk);
+
+		ca->prior_snd_una = tp->snd_una;
+		ca->prior_rcv_nxt = tp->rcv_nxt;
+
+		ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
+
+		ca->delayed_ack_reserved = 0;
+		ca->ce_state = 0;
+
+		dctcp_reset(tp, ca);
+		return;
+	}
+
+	/* No ECN support? Fall back to Reno. Also need to clear
+	 * ECT from sk since it is set during 3WHS for DCTCP.
+	 */
+	inet_csk(sk)->icsk_ca_ops = &dctcp_reno;
+	INET_ECN_dontxmit(sk);
+}
+
+static u32 dctcp_ssthresh(struct sock *sk)
+{
+	const struct dctcp *ca = inet_csk_ca(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
+}
+
+/* Minimal DCTP CE state machine:
+ *
+ * S:	0 <- last pkt was non-CE
+ *	1 <- last pkt was CE
+ */
+
+static void dctcp_ce_state_0_to_1(struct sock *sk)
+{
+	struct dctcp *ca = inet_csk_ca(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/* State has changed from CE=0 to CE=1 and delayed
+	 * ACK has not sent yet.
+	 */
+	if (!ca->ce_state && ca->delayed_ack_reserved) {
+		u32 tmp_rcv_nxt;
+
+		/* Save current rcv_nxt. */
+		tmp_rcv_nxt = tp->rcv_nxt;
+
+		/* Generate previous ack with CE=0. */
+		tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+		tp->rcv_nxt = ca->prior_rcv_nxt;
+
+		tcp_send_ack(sk);
+
+		/* Recover current rcv_nxt. */
+		tp->rcv_nxt = tmp_rcv_nxt;
+	}
+
+	ca->prior_rcv_nxt = tp->rcv_nxt;
+	ca->ce_state = 1;
+
+	tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+}
+
+static void dctcp_ce_state_1_to_0(struct sock *sk)
+{
+	struct dctcp *ca = inet_csk_ca(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/* State has changed from CE=1 to CE=0 and delayed
+	 * ACK has not sent yet.
+	 */
+	if (ca->ce_state && ca->delayed_ack_reserved) {
+		u32 tmp_rcv_nxt;
+
+		/* Save current rcv_nxt. */
+		tmp_rcv_nxt = tp->rcv_nxt;
+
+		/* Generate previous ack with CE=1. */
+		tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+		tp->rcv_nxt = ca->prior_rcv_nxt;
+
+		tcp_send_ack(sk);
+
+		/* Recover current rcv_nxt. */
+		tp->rcv_nxt = tmp_rcv_nxt;
+	}
+
+	ca->prior_rcv_nxt = tp->rcv_nxt;
+	ca->ce_state = 0;
+
+	tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+}
+
+static void dctcp_update_alpha(struct sock *sk, u32 flags)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	struct dctcp *ca = inet_csk_ca(sk);
+	u32 acked_bytes = tp->snd_una - ca->prior_snd_una;
+
+	/* If ack did not advance snd_una, count dupack as MSS size.
+	 * If ack did update window, do not count it at all.
+	 */
+	if (acked_bytes == 0 && !(flags & CA_ACK_WIN_UPDATE))
+		acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss;
+	if (acked_bytes) {
+		ca->acked_bytes_total += acked_bytes;
+		ca->prior_snd_una = tp->snd_una;
+
+		if (flags & CA_ACK_ECE)
+			ca->acked_bytes_ecn += acked_bytes;
+	}
+
+	/* Expired RTT */
+	if (!before(tp->snd_una, ca->next_seq)) {
+		/* For avoiding denominator == 1. */
+		if (ca->acked_bytes_total == 0)
+			ca->acked_bytes_total = 1;
+
+		/* alpha = (1 - g) * alpha + g * F */
+		ca->dctcp_alpha = ca->dctcp_alpha -
+				  (ca->dctcp_alpha >> dctcp_shift_g) +
+				  (ca->acked_bytes_ecn << (10U - dctcp_shift_g)) /
+				  ca->acked_bytes_total;
+
+		if (ca->dctcp_alpha > DCTCP_MAX_ALPHA)
+			/* Clamp dctcp_alpha to max. */
+			ca->dctcp_alpha = DCTCP_MAX_ALPHA;
+
+		dctcp_reset(tp, ca);
+	}
+}
+
+static void dctcp_state(struct sock *sk, u8 new_state)
+{
+	if (dctcp_clamp_alpha_on_loss && new_state == TCP_CA_Loss) {
+		struct dctcp *ca = inet_csk_ca(sk);
+
+		/* If this extension is enabled, we clamp dctcp_alpha to
+		 * max on packet loss; the motivation is that dctcp_alpha
+		 * is an indicator to the extend of congestion and packet
+		 * loss is an indicator of extreme congestion; setting
+		 * this in practice turned out to be beneficial, and
+		 * effectively assumes total congestion which reduces the
+		 * window by half.
+		 */
+		ca->dctcp_alpha = DCTCP_MAX_ALPHA;
+	}
+}
+
+static void dctcp_update_ack_reserved(struct sock *sk, enum tcp_ca_event ev)
+{
+	struct dctcp *ca = inet_csk_ca(sk);
+
+	switch (ev) {
+	case CA_EVENT_DELAYED_ACK:
+		if (!ca->delayed_ack_reserved)
+			ca->delayed_ack_reserved = 1;
+		break;
+	case CA_EVENT_NON_DELAYED_ACK:
+		if (ca->delayed_ack_reserved)
+			ca->delayed_ack_reserved = 0;
+		break;
+	default:
+		/* Don't care for the rest. */
+		break;
+	}
+}
+
+static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
+{
+	switch (ev) {
+	case CA_EVENT_ECN_IS_CE:
+		dctcp_ce_state_0_to_1(sk);
+		break;
+	case CA_EVENT_ECN_NO_CE:
+		dctcp_ce_state_1_to_0(sk);
+		break;
+	case CA_EVENT_DELAYED_ACK:
+	case CA_EVENT_NON_DELAYED_ACK:
+		dctcp_update_ack_reserved(sk, ev);
+		break;
+	default:
+		/* Don't care for the rest. */
+		break;
+	}
+}
+
+static void dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
+{
+	const struct dctcp *ca = inet_csk_ca(sk);
+
+	/* Fill it also in case of VEGASINFO due to req struct limits.
+	 * We can still correctly retrieve it later.
+	 */
+	if (ext & (1 << (INET_DIAG_DCTCPINFO - 1)) ||
+	    ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
+		struct tcp_dctcp_info info;
+
+		memset(&info, 0, sizeof(info));
+		if (inet_csk(sk)->icsk_ca_ops != &dctcp_reno) {
+			info.dctcp_enabled = 1;
+			info.dctcp_ce_state = (u16) ca->ce_state;
+			info.dctcp_alpha = ca->dctcp_alpha;
+			info.dctcp_ab_ecn = ca->acked_bytes_ecn;
+			info.dctcp_ab_tot = ca->acked_bytes_total;
+		}
+
+		nla_put(skb, INET_DIAG_DCTCPINFO, sizeof(info), &info);
+	}
+}
+
+static struct tcp_congestion_ops dctcp __read_mostly = {
+	.init		= dctcp_init,
+	.in_ack_event   = dctcp_update_alpha,
+	.cwnd_event	= dctcp_cwnd_event,
+	.ssthresh	= dctcp_ssthresh,
+	.cong_avoid	= tcp_reno_cong_avoid,
+	.set_state	= dctcp_state,
+	.get_info	= dctcp_get_info,
+	.flags		= TCP_CONG_NEEDS_ECN,
+	.owner		= THIS_MODULE,
+	.name		= "dctcp",
+};
+
+static struct tcp_congestion_ops dctcp_reno __read_mostly = {
+	.ssthresh	= tcp_reno_ssthresh,
+	.cong_avoid	= tcp_reno_cong_avoid,
+	.get_info	= dctcp_get_info,
+	.owner		= THIS_MODULE,
+	.name		= "dctcp-reno",
+};
+
+static int __init dctcp_register(void)
+{
+	BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE);
+	return tcp_register_congestion_control(&dctcp);
+}
+
+static void __exit dctcp_unregister(void)
+{
+	tcp_unregister_congestion_control(&dctcp);
+}
+
+module_init(dctcp_register);
+module_exit(dctcp_unregister);
+
+MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_AUTHOR("Glenn Judd <glenn.judd@morganstanley.com>");
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("DataCenter TCP (DCTCP)");
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 124f9e4e4594..86a0216fcaa1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3211,6 +3211,7 @@ void tcp_send_ack(struct sock *sk)
 	skb_mstamp_get(&buff->skb_mstamp);
 	tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
 }
+EXPORT_SYMBOL_GPL(tcp_send_ack);
 
 /* This routine sends a packet with an out of date sequence
  * number. It assumes the other end will try to ack it.
-- 
cgit v1.2.3


From f5c9ecebaf2a2c9381973798e389cc019dd983e0 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 29 Sep 2014 10:06:19 -0600
Subject: vfio/iommu_type1: add new VFIO_TYPE1_NESTING_IOMMU IOMMU type

VFIO allows devices to be safely handed off to userspace by putting
them behind an IOMMU configured to ensure DMA and interrupt isolation.
This enables userspace KVM clients, such as kvmtool and qemu, to further
map the device into a virtual machine.

With IOMMUs such as the ARM SMMU, it is then possible to provide SMMU
translation services to the guest operating system, which are nested
with the existing translation installed by VFIO. However, enabling this
feature means that the IOMMU driver must be informed that the VFIO domain
is being created for the purposes of nested translation.

This patch adds a new IOMMU type (VFIO_TYPE1_NESTING_IOMMU) to the VFIO
type-1 driver. The new IOMMU type acts identically to the
VFIO_TYPE1v2_IOMMU type, but additionally sets the DOMAIN_ATTR_NESTING
attribute on its IOMMU domains.

Cc: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio_iommu_type1.c | 30 +++++++++++++++++++++++++-----
 include/uapi/linux/vfio.h       |  3 +++
 2 files changed, 28 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 0734fbe5b651..583ccdb2c58f 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -57,7 +57,8 @@ struct vfio_iommu {
 	struct list_head	domain_list;
 	struct mutex		lock;
 	struct rb_root		dma_list;
-	bool v2;
+	bool			v2;
+	bool			nesting;
 };
 
 struct vfio_domain {
@@ -705,6 +706,15 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 		goto out_free;
 	}
 
+	if (iommu->nesting) {
+		int attr = 1;
+
+		ret = iommu_domain_set_attr(domain->domain, DOMAIN_ATTR_NESTING,
+					    &attr);
+		if (ret)
+			goto out_domain;
+	}
+
 	ret = iommu_attach_group(domain->domain, iommu_group);
 	if (ret)
 		goto out_domain;
@@ -819,17 +829,26 @@ static void *vfio_iommu_type1_open(unsigned long arg)
 {
 	struct vfio_iommu *iommu;
 
-	if (arg != VFIO_TYPE1_IOMMU && arg != VFIO_TYPE1v2_IOMMU)
-		return ERR_PTR(-EINVAL);
-
 	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
 	if (!iommu)
 		return ERR_PTR(-ENOMEM);
 
+	switch (arg) {
+	case VFIO_TYPE1_IOMMU:
+		break;
+	case VFIO_TYPE1_NESTING_IOMMU:
+		iommu->nesting = true;
+	case VFIO_TYPE1v2_IOMMU:
+		iommu->v2 = true;
+		break;
+	default:
+		kfree(iommu);
+		return ERR_PTR(-EINVAL);
+	}
+
 	INIT_LIST_HEAD(&iommu->domain_list);
 	iommu->dma_list = RB_ROOT;
 	mutex_init(&iommu->lock);
-	iommu->v2 = (arg == VFIO_TYPE1v2_IOMMU);
 
 	return iommu;
 }
@@ -885,6 +904,7 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
 		switch (arg) {
 		case VFIO_TYPE1_IOMMU:
 		case VFIO_TYPE1v2_IOMMU:
+		case VFIO_TYPE1_NESTING_IOMMU:
 			return 1;
 		case VFIO_DMA_CC_IOMMU:
 			if (!iommu)
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 6612974c64bf..29715d27548f 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -33,6 +33,9 @@
 /* Check if EEH is supported */
 #define VFIO_EEH			5
 
+/* Two-stage IOMMU */
+#define VFIO_TYPE1_NESTING_IOMMU	6	/* Implies v2 */
+
 /*
  * The IOCTL interface is designed for extensibility by embedding the
  * structure length (argsz) and flags into structures passed between
-- 
cgit v1.2.3


From 79cf79abce71eb7dbc40e2f3121048ca5405cb47 Mon Sep 17 00:00:00 2001
From: Michael Braun <michael-dev@fami-braun.de>
Date: Thu, 25 Sep 2014 16:31:08 +0200
Subject: macvlan: add source mode

This patch adds a new mode of operation to macvlan, called "source".
It allows one to set a list of allowed mac address, which is used
to match against source mac address from received frames on underlying
interface.
This enables creating mac based VLAN associations, instead of standard
port or tag based. The feature is useful to deploy 802.1x mac based
behavior, where drivers of underlying interfaces doesn't allows that.

Configuration is done through the netlink interface using e.g.:
 ip link add link eth0 name macvlan0 type macvlan mode source
 ip link add link eth0 name macvlan1 type macvlan mode source
 ip link set link dev macvlan0 type macvlan macaddr add 00:11:11:11:11:11
 ip link set link dev macvlan0 type macvlan macaddr add 00:22:22:22:22:22
 ip link set link dev macvlan0 type macvlan macaddr add 00:33:33:33:33:33
 ip link set link dev macvlan1 type macvlan macaddr add 00:33:33:33:33:33
 ip link set link dev macvlan1 type macvlan macaddr add 00:44:44:44:44:44

This allows clients with MAC addresses 00:11:11:11:11:11,
00:22:22:22:22:22 to be part of only VLAN associated with macvlan0
interface. Clients with MAC addresses 00:44:44:44:44:44 with only VLAN
associated with macvlan1 interface. And client with MAC address
00:33:33:33:33:33 to be associated with both VLANs.

Based on work of Stefan Gula <steweg@gmail.com>

v8: last version of Stefan Gula for Kernel 3.2.1
v9: rework onto linux-next 2014-03-12 by Michael Braun
    add MACADDR_SET command, enable to configure mac for source mode
    while creating interface
v10:
  - reduce indention level
  - rename source_list to source_entry
  - use aligned 64bit ether address
  - use hash_64 instead of addr[5]
v11:
  - rebase for 3.14 / linux-next 20.04.2014
v12
  - rebase for linux-next 2014-09-25

Signed-off-by: Michael Braun <michael-dev@fami-braun.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c        | 304 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/if_macvlan.h   |   1 +
 include/uapi/linux/if_link.h |  12 ++
 3 files changed, 314 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 726edabff26b..e8a453f1b458 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -35,7 +35,8 @@
 #include <net/xfrm.h>
 #include <linux/netpoll.h>
 
-#define MACVLAN_HASH_SIZE	(1 << BITS_PER_BYTE)
+#define MACVLAN_HASH_BITS	8
+#define MACVLAN_HASH_SIZE	(1<<MACVLAN_HASH_BITS)
 #define MACVLAN_BC_QUEUE_LEN	1000
 
 struct macvlan_port {
@@ -47,6 +48,14 @@ struct macvlan_port {
 	struct work_struct	bc_work;
 	bool 			passthru;
 	int			count;
+	struct hlist_head	vlan_source_hash[MACVLAN_HASH_SIZE];
+};
+
+struct macvlan_source_entry {
+	struct hlist_node	hlist;
+	struct macvlan_dev	*vlan;
+	unsigned char		addr[6+2] __aligned(sizeof(u16));
+	struct rcu_head		rcu;
 };
 
 struct macvlan_skb_cb {
@@ -57,6 +66,20 @@ struct macvlan_skb_cb {
 
 static void macvlan_port_destroy(struct net_device *dev);
 
+/* Hash Ethernet address */
+static u32 macvlan_eth_hash(const unsigned char *addr)
+{
+	u64 value = get_unaligned((u64 *)addr);
+
+	/* only want 6 bytes */
+#ifdef __BIG_ENDIAN
+	value >>= 16;
+#else
+	value <<= 16;
+#endif
+	return hash_64(value, MACVLAN_HASH_BITS);
+}
+
 static struct macvlan_port *macvlan_port_get_rcu(const struct net_device *dev)
 {
 	return rcu_dereference(dev->rx_handler_data);
@@ -73,20 +96,68 @@ static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port,
 					       const unsigned char *addr)
 {
 	struct macvlan_dev *vlan;
+	u32 idx = macvlan_eth_hash(addr);
 
-	hlist_for_each_entry_rcu(vlan, &port->vlan_hash[addr[5]], hlist) {
+	hlist_for_each_entry_rcu(vlan, &port->vlan_hash[idx], hlist) {
 		if (ether_addr_equal_64bits(vlan->dev->dev_addr, addr))
 			return vlan;
 	}
 	return NULL;
 }
 
+static struct macvlan_source_entry *macvlan_hash_lookup_source(
+	const struct macvlan_dev *vlan,
+	const unsigned char *addr)
+{
+	struct macvlan_source_entry *entry;
+	u32 idx = macvlan_eth_hash(addr);
+	struct hlist_head *h = &vlan->port->vlan_source_hash[idx];
+
+	hlist_for_each_entry_rcu(entry, h, hlist) {
+		if (ether_addr_equal_64bits(entry->addr, addr) &&
+		    entry->vlan == vlan)
+			return entry;
+	}
+	return NULL;
+}
+
+static int macvlan_hash_add_source(struct macvlan_dev *vlan,
+				   const unsigned char *addr)
+{
+	struct macvlan_port *port = vlan->port;
+	struct macvlan_source_entry *entry;
+	struct hlist_head *h;
+
+	entry = macvlan_hash_lookup_source(vlan, addr);
+	if (entry)
+		return 0;
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	ether_addr_copy(entry->addr, addr);
+	entry->vlan = vlan;
+	h = &port->vlan_source_hash[macvlan_eth_hash(addr)];
+	hlist_add_head_rcu(&entry->hlist, h);
+	vlan->macaddr_count++;
+
+	return 0;
+}
+
 static void macvlan_hash_add(struct macvlan_dev *vlan)
 {
 	struct macvlan_port *port = vlan->port;
 	const unsigned char *addr = vlan->dev->dev_addr;
+	u32 idx = macvlan_eth_hash(addr);
 
-	hlist_add_head_rcu(&vlan->hlist, &port->vlan_hash[addr[5]]);
+	hlist_add_head_rcu(&vlan->hlist, &port->vlan_hash[idx]);
+}
+
+static void macvlan_hash_del_source(struct macvlan_source_entry *entry)
+{
+	hlist_del_rcu(&entry->hlist);
+	kfree_rcu(entry, rcu);
 }
 
 static void macvlan_hash_del(struct macvlan_dev *vlan, bool sync)
@@ -267,6 +338,65 @@ err:
 	atomic_long_inc(&skb->dev->rx_dropped);
 }
 
+static void macvlan_flush_sources(struct macvlan_port *port,
+				  struct macvlan_dev *vlan)
+{
+	int i;
+
+	for (i = 0; i < MACVLAN_HASH_SIZE; i++) {
+		struct hlist_node *h, *n;
+
+		hlist_for_each_safe(h, n, &port->vlan_source_hash[i]) {
+			struct macvlan_source_entry *entry;
+
+			entry = hlist_entry(h, struct macvlan_source_entry,
+					    hlist);
+			if (entry->vlan == vlan)
+				macvlan_hash_del_source(entry);
+		}
+	}
+	vlan->macaddr_count = 0;
+}
+
+static void macvlan_forward_source_one(struct sk_buff *skb,
+				       struct macvlan_dev *vlan)
+{
+	struct sk_buff *nskb;
+	struct net_device *dev;
+	int len;
+	int ret;
+
+	dev = vlan->dev;
+	if (unlikely(!(dev->flags & IFF_UP)))
+		return;
+
+	nskb = skb_clone(skb, GFP_ATOMIC);
+	if (!nskb)
+		return;
+
+	len = nskb->len + ETH_HLEN;
+	nskb->dev = dev;
+	nskb->pkt_type = PACKET_HOST;
+
+	ret = netif_rx(nskb);
+	macvlan_count_rx(vlan, len, ret == NET_RX_SUCCESS, 0);
+}
+
+static void macvlan_forward_source(struct sk_buff *skb,
+				   struct macvlan_port *port,
+				   const unsigned char *addr)
+{
+	struct macvlan_source_entry *entry;
+	u32 idx = macvlan_eth_hash(addr);
+	struct hlist_head *h = &port->vlan_source_hash[idx];
+
+	hlist_for_each_entry_rcu(entry, h, hlist) {
+		if (ether_addr_equal_64bits(entry->addr, addr))
+			if (entry->vlan->dev->flags & IFF_UP)
+				macvlan_forward_source_one(skb, entry->vlan);
+	}
+}
+
 /* called under rcu_read_lock() from netif_receive_skb */
 static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 {
@@ -285,6 +415,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 		if (!skb)
 			return RX_HANDLER_CONSUMED;
 		eth = eth_hdr(skb);
+		macvlan_forward_source(skb, port, eth->h_source);
 		src = macvlan_hash_lookup(port, eth->h_source);
 		if (src && src->mode != MACVLAN_MODE_VEPA &&
 		    src->mode != MACVLAN_MODE_BRIDGE) {
@@ -301,6 +432,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 		return RX_HANDLER_PASS;
 	}
 
+	macvlan_forward_source(skb, port, eth->h_source);
 	if (port->passthru)
 		vlan = list_first_or_null_rcu(&port->vlans,
 					      struct macvlan_dev, list);
@@ -667,6 +799,7 @@ static void macvlan_uninit(struct net_device *dev)
 
 	free_percpu(vlan->pcpu_stats);
 
+	macvlan_flush_sources(port, vlan);
 	port->count -= 1;
 	if (!port->count)
 		macvlan_port_destroy(port->dev);
@@ -925,6 +1058,8 @@ static int macvlan_port_create(struct net_device *dev)
 	INIT_LIST_HEAD(&port->vlans);
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
+	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
+		INIT_HLIST_HEAD(&port->vlan_source_hash[i]);
 
 	skb_queue_head_init(&port->bc_queue);
 	INIT_WORK(&port->bc_work, macvlan_process_broadcast);
@@ -966,11 +1101,102 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[])
 		case MACVLAN_MODE_VEPA:
 		case MACVLAN_MODE_BRIDGE:
 		case MACVLAN_MODE_PASSTHRU:
+		case MACVLAN_MODE_SOURCE:
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	if (data && data[IFLA_MACVLAN_MACADDR_MODE]) {
+		switch (nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE])) {
+		case MACVLAN_MACADDR_ADD:
+		case MACVLAN_MACADDR_DEL:
+		case MACVLAN_MACADDR_FLUSH:
+		case MACVLAN_MACADDR_SET:
 			break;
 		default:
 			return -EINVAL;
 		}
 	}
+
+	if (data && data[IFLA_MACVLAN_MACADDR]) {
+		if (nla_len(data[IFLA_MACVLAN_MACADDR]) != ETH_ALEN)
+			return -EINVAL;
+
+		if (!is_valid_ether_addr(nla_data(data[IFLA_MACVLAN_MACADDR])))
+			return -EADDRNOTAVAIL;
+	}
+
+	if (data && data[IFLA_MACVLAN_MACADDR_COUNT])
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * reconfigure list of remote source mac address
+ * (only for macvlan devices in source mode)
+ * Note regarding alignment: all netlink data is aligned to 4 Byte, which
+ * suffices for both ether_addr_copy and ether_addr_equal_64bits usage.
+ */
+static int macvlan_changelink_sources(struct macvlan_dev *vlan, u32 mode,
+				      struct nlattr *data[])
+{
+	char *addr = NULL;
+	int ret, rem, len;
+	struct nlattr *nla, *head;
+	struct macvlan_source_entry *entry;
+
+	if (data[IFLA_MACVLAN_MACADDR])
+		addr = nla_data(data[IFLA_MACVLAN_MACADDR]);
+
+	if (mode == MACVLAN_MACADDR_ADD) {
+		if (!addr)
+			return -EINVAL;
+
+		return macvlan_hash_add_source(vlan, addr);
+
+	} else if (mode == MACVLAN_MACADDR_DEL) {
+		if (!addr)
+			return -EINVAL;
+
+		entry = macvlan_hash_lookup_source(vlan, addr);
+		if (entry) {
+			macvlan_hash_del_source(entry);
+			vlan->macaddr_count--;
+		}
+	} else if (mode == MACVLAN_MACADDR_FLUSH) {
+		macvlan_flush_sources(vlan->port, vlan);
+	} else if (mode == MACVLAN_MACADDR_SET) {
+		macvlan_flush_sources(vlan->port, vlan);
+
+		if (addr) {
+			ret = macvlan_hash_add_source(vlan, addr);
+			if (ret)
+				return ret;
+		}
+
+		if (!data || !data[IFLA_MACVLAN_MACADDR_DATA])
+			return 0;
+
+		head = nla_data(data[IFLA_MACVLAN_MACADDR_DATA]);
+		len = nla_len(data[IFLA_MACVLAN_MACADDR_DATA]);
+
+		nla_for_each_attr(nla, head, len, rem) {
+			if (nla_type(nla) != IFLA_MACVLAN_MACADDR ||
+			    nla_len(nla) != ETH_ALEN)
+				continue;
+
+			addr = nla_data(nla);
+			ret = macvlan_hash_add_source(vlan, addr);
+			if (ret)
+				return ret;
+		}
+	} else {
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -981,6 +1207,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 	struct macvlan_port *port;
 	struct net_device *lowerdev;
 	int err;
+	int macmode;
 
 	if (!tb[IFLA_LINK])
 		return -EINVAL;
@@ -1034,6 +1261,15 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 		eth_hw_addr_inherit(dev, lowerdev);
 	}
 
+	if (data && data[IFLA_MACVLAN_MACADDR_MODE]) {
+		if (vlan->mode != MACVLAN_MODE_SOURCE)
+			return -EINVAL;
+		macmode = nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE]);
+		err = macvlan_changelink_sources(vlan, macmode, data);
+		if (err)
+			return err;
+	}
+
 	port->count += 1;
 	err = register_netdevice(dev);
 	if (err < 0)
@@ -1070,6 +1306,8 @@ void macvlan_dellink(struct net_device *dev, struct list_head *head)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 
+	if (vlan->mode == MACVLAN_MODE_SOURCE)
+		macvlan_flush_sources(vlan->port, vlan);
 	list_del_rcu(&vlan->list);
 	unregister_netdevice_queue(dev, head);
 	netdev_upper_dev_unlink(vlan->lowerdev, dev);
@@ -1082,6 +1320,8 @@ static int macvlan_changelink(struct net_device *dev,
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	enum macvlan_mode mode;
 	bool set_mode = false;
+	enum macvlan_macaddr_mode macmode;
+	int ret;
 
 	/* Validate mode, but don't set yet: setting flags may fail. */
 	if (data && data[IFLA_MACVLAN_MODE]) {
@@ -1091,6 +1331,9 @@ static int macvlan_changelink(struct net_device *dev,
 		if ((mode == MACVLAN_MODE_PASSTHRU) !=
 		    (vlan->mode == MACVLAN_MODE_PASSTHRU))
 			return -EINVAL;
+		if (vlan->mode == MACVLAN_MODE_SOURCE &&
+		    vlan->mode != mode)
+			macvlan_flush_sources(vlan->port, vlan);
 	}
 
 	if (data && data[IFLA_MACVLAN_FLAGS]) {
@@ -1110,26 +1353,77 @@ static int macvlan_changelink(struct net_device *dev,
 	}
 	if (set_mode)
 		vlan->mode = mode;
+	if (data && data[IFLA_MACVLAN_MACADDR_MODE]) {
+		if (vlan->mode != MACVLAN_MODE_SOURCE)
+			return -EINVAL;
+		macmode = nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE]);
+		ret = macvlan_changelink_sources(vlan, macmode, data);
+		if (ret)
+			return ret;
+	}
 	return 0;
 }
 
+static size_t macvlan_get_size_mac(const struct macvlan_dev *vlan)
+{
+	if (vlan->macaddr_count == 0)
+		return 0;
+	return nla_total_size(0) /* IFLA_MACVLAN_MACADDR_DATA */
+		+ vlan->macaddr_count * nla_total_size(sizeof(u8) * ETH_ALEN);
+}
+
 static size_t macvlan_get_size(const struct net_device *dev)
 {
+	struct macvlan_dev *vlan = netdev_priv(dev);
+
 	return (0
 		+ nla_total_size(4) /* IFLA_MACVLAN_MODE */
 		+ nla_total_size(2) /* IFLA_MACVLAN_FLAGS */
+		+ nla_total_size(4) /* IFLA_MACVLAN_MACADDR_COUNT */
+		+ macvlan_get_size_mac(vlan) /* IFLA_MACVLAN_MACADDR */
 		);
 }
 
+static int macvlan_fill_info_macaddr(struct sk_buff *skb,
+				     const struct macvlan_dev *vlan,
+				     const int i)
+{
+	struct hlist_head *h = &vlan->port->vlan_source_hash[i];
+	struct macvlan_source_entry *entry;
+
+	hlist_for_each_entry_rcu(entry, h, hlist) {
+		if (entry->vlan != vlan)
+			continue;
+		if (nla_put(skb, IFLA_MACVLAN_MACADDR, ETH_ALEN, entry->addr))
+			return 1;
+	}
+	return 0;
+}
+
 static int macvlan_fill_info(struct sk_buff *skb,
 				const struct net_device *dev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
+	int i;
+	struct nlattr *nest;
 
 	if (nla_put_u32(skb, IFLA_MACVLAN_MODE, vlan->mode))
 		goto nla_put_failure;
 	if (nla_put_u16(skb, IFLA_MACVLAN_FLAGS, vlan->flags))
 		goto nla_put_failure;
+	if (nla_put_u32(skb, IFLA_MACVLAN_MACADDR_COUNT, vlan->macaddr_count))
+		goto nla_put_failure;
+	if (vlan->macaddr_count > 0) {
+		nest = nla_nest_start(skb, IFLA_MACVLAN_MACADDR_DATA);
+		if (nest == NULL)
+			goto nla_put_failure;
+
+		for (i = 0; i < MACVLAN_HASH_SIZE; i++) {
+			if (macvlan_fill_info_macaddr(skb, vlan, i))
+				goto nla_put_failure;
+		}
+		nla_nest_end(skb, nest);
+	}
 	return 0;
 
 nla_put_failure:
@@ -1139,6 +1433,10 @@ nla_put_failure:
 static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = {
 	[IFLA_MACVLAN_MODE]  = { .type = NLA_U32 },
 	[IFLA_MACVLAN_FLAGS] = { .type = NLA_U16 },
+	[IFLA_MACVLAN_MACADDR_MODE] = { .type = NLA_U32 },
+	[IFLA_MACVLAN_MACADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+	[IFLA_MACVLAN_MACADDR_DATA] = { .type = NLA_NESTED },
+	[IFLA_MACVLAN_MACADDR_COUNT] = { .type = NLA_U32 },
 };
 
 int macvlan_link_register(struct rtnl_link_ops *ops)
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 6b2c7cf352a5..6f6929ea8a0c 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -60,6 +60,7 @@ struct macvlan_dev {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	struct netpoll		*netpoll;
 #endif
+	unsigned int		macaddr_count;
 };
 
 static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index c80f95f6ee78..0bdb77e16875 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -303,6 +303,10 @@ enum {
 	IFLA_MACVLAN_UNSPEC,
 	IFLA_MACVLAN_MODE,
 	IFLA_MACVLAN_FLAGS,
+	IFLA_MACVLAN_MACADDR_MODE,
+	IFLA_MACVLAN_MACADDR,
+	IFLA_MACVLAN_MACADDR_DATA,
+	IFLA_MACVLAN_MACADDR_COUNT,
 	__IFLA_MACVLAN_MAX,
 };
 
@@ -313,6 +317,14 @@ enum macvlan_mode {
 	MACVLAN_MODE_VEPA    = 2, /* talk to other ports through ext bridge */
 	MACVLAN_MODE_BRIDGE  = 4, /* talk to bridge ports directly */
 	MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */
+	MACVLAN_MODE_SOURCE  = 16,/* use source MAC address list to assign */
+};
+
+enum macvlan_macaddr_mode {
+	MACVLAN_MACADDR_ADD,
+	MACVLAN_MACADDR_DEL,
+	MACVLAN_MACADDR_FLUSH,
+	MACVLAN_MACADDR_SET,
 };
 
 #define MACVLAN_FLAG_NOPROMISC	1
-- 
cgit v1.2.3


From 51b0a5d8c21a91801bbef9bcc8639dc0b206c6cd Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 26 Sep 2014 14:35:14 +0200
Subject: netfilter: nft_reject: introduce icmp code abstraction for inet and
 bridge

This patch introduces the NFT_REJECT_ICMPX_UNREACH type which provides
an abstraction to the ICMP and ICMPv6 codes that you can use from the
inet and bridge tables, they are:

* NFT_REJECT_ICMPX_NO_ROUTE: no route to host - network unreachable
* NFT_REJECT_ICMPX_PORT_UNREACH: port unreachable
* NFT_REJECT_ICMPX_HOST_UNREACH: host unreachable
* NFT_REJECT_ICMPX_ADMIN_PROHIBITED: administratevely prohibited

You can still use the specific codes when restricting the rule to match
the corresponding layer 3 protocol.

I decided to not overload the existing NFT_REJECT_ICMP_UNREACH to have
different semantics depending on the table family and to allow the user
to specify ICMP family specific codes if they restrict it to the
corresponding family.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv4/nf_reject.h   |  1 +
 include/net/netfilter/nft_reject.h       |  9 +--
 include/uapi/linux/netfilter/nf_tables.h | 21 +++++++
 net/bridge/netfilter/nft_reject_bridge.c | 95 ++++++++++++++++++++++++++++++--
 net/ipv4/netfilter/nft_reject_ipv4.c     |  1 -
 net/netfilter/nft_reject.c               | 37 +++++++++++++
 net/netfilter/nft_reject_inet.c          | 94 +++++++++++++++++++++++++++++--
 7 files changed, 241 insertions(+), 17 deletions(-)

(limited to 'include/uapi')

diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h
index f713b5a31d62..8ce06385a552 100644
--- a/include/net/netfilter/ipv4/nf_reject.h
+++ b/include/net/netfilter/ipv4/nf_reject.h
@@ -5,6 +5,7 @@
 #include <net/tcp.h>
 #include <net/route.h>
 #include <net/dst.h>
+#include <net/icmp.h>
 
 static inline void nf_send_unreach(struct sk_buff *skb_in, int code)
 {
diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h
index 36b0da2d55bb..60fa1530006b 100644
--- a/include/net/netfilter/nft_reject.h
+++ b/include/net/netfilter/nft_reject.h
@@ -14,12 +14,7 @@ int nft_reject_init(const struct nft_ctx *ctx,
 
 int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr);
 
-void nft_reject_ipv4_eval(const struct nft_expr *expr,
-			  struct nft_data data[NFT_REG_MAX + 1],
-			  const struct nft_pktinfo *pkt);
-
-void nft_reject_ipv6_eval(const struct nft_expr *expr,
-			  struct nft_data data[NFT_REG_MAX + 1],
-			  const struct nft_pktinfo *pkt);
+int nft_reject_icmp_code(u8 code);
+int nft_reject_icmpv6_code(u8 code);
 
 #endif
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index b72ccfeaf865..c26df6787fb0 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -749,12 +749,33 @@ enum nft_queue_attributes {
  *
  * @NFT_REJECT_ICMP_UNREACH: reject using ICMP unreachable
  * @NFT_REJECT_TCP_RST: reject using TCP RST
+ * @NFT_REJECT_ICMPX_UNREACH: abstracted ICMP unreachable for bridge and inet
  */
 enum nft_reject_types {
 	NFT_REJECT_ICMP_UNREACH,
 	NFT_REJECT_TCP_RST,
+	NFT_REJECT_ICMPX_UNREACH,
 };
 
+/**
+ * enum nft_reject_code - Generic reject codes for IPv4/IPv6
+ *
+ * @NFT_REJECT_ICMPX_NO_ROUTE: no route to host / network unreachable
+ * @NFT_REJECT_ICMPX_PORT_UNREACH: port unreachable
+ * @NFT_REJECT_ICMPX_HOST_UNREACH: host unreachable
+ * @NFT_REJECT_ICMPX_ADMIN_PROHIBITED: administratively prohibited
+ *
+ * These codes are mapped to real ICMP and ICMPv6 codes.
+ */
+enum nft_reject_inet_code {
+	NFT_REJECT_ICMPX_NO_ROUTE	= 0,
+	NFT_REJECT_ICMPX_PORT_UNREACH,
+	NFT_REJECT_ICMPX_HOST_UNREACH,
+	NFT_REJECT_ICMPX_ADMIN_PROHIBITED,
+	__NFT_REJECT_ICMPX_MAX
+};
+#define NFT_REJECT_ICMPX_MAX	(__NFT_REJECT_ICMPX_MAX + 1)
+
 /**
  * enum nft_reject_attributes - nf_tables reject expression netlink attributes
  *
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index ee3ffe93e14e..a76479535df2 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -14,21 +14,106 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nft_reject.h>
+#include <net/netfilter/ipv4/nf_reject.h>
+#include <net/netfilter/ipv6/nf_reject.h>
 
 static void nft_reject_bridge_eval(const struct nft_expr *expr,
 				 struct nft_data data[NFT_REG_MAX + 1],
 				 const struct nft_pktinfo *pkt)
 {
+	struct nft_reject *priv = nft_expr_priv(expr);
+	struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
+
 	switch (eth_hdr(pkt->skb)->h_proto) {
 	case htons(ETH_P_IP):
-		return nft_reject_ipv4_eval(expr, data, pkt);
+		switch (priv->type) {
+		case NFT_REJECT_ICMP_UNREACH:
+			nf_send_unreach(pkt->skb, priv->icmp_code);
+			break;
+		case NFT_REJECT_TCP_RST:
+			nf_send_reset(pkt->skb, pkt->ops->hooknum);
+			break;
+		case NFT_REJECT_ICMPX_UNREACH:
+			nf_send_unreach(pkt->skb,
+					nft_reject_icmp_code(priv->icmp_code));
+			break;
+		}
+		break;
 	case htons(ETH_P_IPV6):
-		return nft_reject_ipv6_eval(expr, data, pkt);
+		switch (priv->type) {
+		case NFT_REJECT_ICMP_UNREACH:
+			nf_send_unreach6(net, pkt->skb, priv->icmp_code,
+					 pkt->ops->hooknum);
+			break;
+		case NFT_REJECT_TCP_RST:
+			nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
+			break;
+		case NFT_REJECT_ICMPX_UNREACH:
+			nf_send_unreach6(net, pkt->skb,
+					 nft_reject_icmpv6_code(priv->icmp_code),
+					 pkt->ops->hooknum);
+			break;
+		}
+		break;
 	default:
 		/* No explicit way to reject this protocol, drop it. */
-		data[NFT_REG_VERDICT].verdict = NF_DROP;
 		break;
 	}
+	data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+
+static int nft_reject_bridge_init(const struct nft_ctx *ctx,
+				  const struct nft_expr *expr,
+				  const struct nlattr * const tb[])
+{
+	struct nft_reject *priv = nft_expr_priv(expr);
+	int icmp_code;
+
+	if (tb[NFTA_REJECT_TYPE] == NULL)
+		return -EINVAL;
+
+	priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE]));
+	switch (priv->type) {
+	case NFT_REJECT_ICMP_UNREACH:
+	case NFT_REJECT_ICMPX_UNREACH:
+		if (tb[NFTA_REJECT_ICMP_CODE] == NULL)
+			return -EINVAL;
+
+		icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]);
+		if (priv->type == NFT_REJECT_ICMPX_UNREACH &&
+		    icmp_code > NFT_REJECT_ICMPX_MAX)
+			return -EINVAL;
+
+		priv->icmp_code = icmp_code;
+		break;
+	case NFT_REJECT_TCP_RST:
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int nft_reject_bridge_dump(struct sk_buff *skb,
+				  const struct nft_expr *expr)
+{
+	const struct nft_reject *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_REJECT_TYPE, htonl(priv->type)))
+		goto nla_put_failure;
+
+	switch (priv->type) {
+	case NFT_REJECT_ICMP_UNREACH:
+	case NFT_REJECT_ICMPX_UNREACH:
+		if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code))
+			goto nla_put_failure;
+		break;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -1;
 }
 
 static struct nft_expr_type nft_reject_bridge_type;
@@ -36,8 +121,8 @@ static const struct nft_expr_ops nft_reject_bridge_ops = {
 	.type		= &nft_reject_bridge_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_reject)),
 	.eval		= nft_reject_bridge_eval,
-	.init		= nft_reject_init,
-	.dump		= nft_reject_dump,
+	.init		= nft_reject_bridge_init,
+	.dump		= nft_reject_bridge_dump,
 };
 
 static struct nft_expr_type nft_reject_bridge_type __read_mostly = {
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index e79718a382f2..ed33299c56d1 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -16,7 +16,6 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
-#include <net/icmp.h>
 #include <net/netfilter/ipv4/nf_reject.h>
 #include <net/netfilter/nft_reject.h>
 
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index f3448c296446..ec8a456092a7 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -17,6 +17,8 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nft_reject.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
 
 const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
 	[NFTA_REJECT_TYPE]		= { .type = NLA_U32 },
@@ -70,5 +72,40 @@ nla_put_failure:
 }
 EXPORT_SYMBOL_GPL(nft_reject_dump);
 
+static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX] = {
+	[NFT_REJECT_ICMPX_NO_ROUTE]		= ICMP_NET_UNREACH,
+	[NFT_REJECT_ICMPX_PORT_UNREACH]		= ICMP_PORT_UNREACH,
+	[NFT_REJECT_ICMPX_HOST_UNREACH]		= ICMP_HOST_UNREACH,
+	[NFT_REJECT_ICMPX_ADMIN_PROHIBITED]	= ICMP_PKT_FILTERED,
+};
+
+int nft_reject_icmp_code(u8 code)
+{
+	if (code > NFT_REJECT_ICMPX_MAX)
+		return -EINVAL;
+
+	return icmp_code_v4[code];
+}
+
+EXPORT_SYMBOL_GPL(nft_reject_icmp_code);
+
+
+static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX] = {
+	[NFT_REJECT_ICMPX_NO_ROUTE]		= ICMPV6_NOROUTE,
+	[NFT_REJECT_ICMPX_PORT_UNREACH]		= ICMPV6_PORT_UNREACH,
+	[NFT_REJECT_ICMPX_HOST_UNREACH]		= ICMPV6_ADDR_UNREACH,
+	[NFT_REJECT_ICMPX_ADMIN_PROHIBITED]	= ICMPV6_ADM_PROHIBITED,
+};
+
+int nft_reject_icmpv6_code(u8 code)
+{
+	if (code > NFT_REJECT_ICMPX_MAX)
+		return -EINVAL;
+
+	return icmp_code_v6[code];
+}
+
+EXPORT_SYMBOL_GPL(nft_reject_icmpv6_code);
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index b718a52a4654..7b5f9d58680a 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -14,17 +14,103 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nft_reject.h>
+#include <net/netfilter/ipv4/nf_reject.h>
+#include <net/netfilter/ipv6/nf_reject.h>
 
 static void nft_reject_inet_eval(const struct nft_expr *expr,
 				 struct nft_data data[NFT_REG_MAX + 1],
 				 const struct nft_pktinfo *pkt)
 {
+	struct nft_reject *priv = nft_expr_priv(expr);
+	struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
+
 	switch (pkt->ops->pf) {
 	case NFPROTO_IPV4:
-		return nft_reject_ipv4_eval(expr, data, pkt);
+		switch (priv->type) {
+		case NFT_REJECT_ICMP_UNREACH:
+			nf_send_unreach(pkt->skb, priv->icmp_code);
+			break;
+		case NFT_REJECT_TCP_RST:
+			nf_send_reset(pkt->skb, pkt->ops->hooknum);
+			break;
+		case NFT_REJECT_ICMPX_UNREACH:
+			nf_send_unreach(pkt->skb,
+					nft_reject_icmp_code(priv->icmp_code));
+			break;
+		}
+		break;
 	case NFPROTO_IPV6:
-		return nft_reject_ipv6_eval(expr, data, pkt);
+		switch (priv->type) {
+		case NFT_REJECT_ICMP_UNREACH:
+			nf_send_unreach6(net, pkt->skb, priv->icmp_code,
+					 pkt->ops->hooknum);
+			break;
+		case NFT_REJECT_TCP_RST:
+			nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
+			break;
+		case NFT_REJECT_ICMPX_UNREACH:
+			nf_send_unreach6(net, pkt->skb,
+					 nft_reject_icmpv6_code(priv->icmp_code),
+					 pkt->ops->hooknum);
+			break;
+		}
+		break;
+	}
+	data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+
+static int nft_reject_inet_init(const struct nft_ctx *ctx,
+				const struct nft_expr *expr,
+				const struct nlattr * const tb[])
+{
+	struct nft_reject *priv = nft_expr_priv(expr);
+	int icmp_code;
+
+	if (tb[NFTA_REJECT_TYPE] == NULL)
+		return -EINVAL;
+
+	priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE]));
+	switch (priv->type) {
+	case NFT_REJECT_ICMP_UNREACH:
+	case NFT_REJECT_ICMPX_UNREACH:
+		if (tb[NFTA_REJECT_ICMP_CODE] == NULL)
+			return -EINVAL;
+
+		icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]);
+		if (priv->type == NFT_REJECT_ICMPX_UNREACH &&
+		    icmp_code > NFT_REJECT_ICMPX_MAX)
+			return -EINVAL;
+
+		priv->icmp_code = icmp_code;
+		break;
+	case NFT_REJECT_TCP_RST:
+		break;
+	default:
+		return -EINVAL;
 	}
+	return 0;
+}
+
+static int nft_reject_inet_dump(struct sk_buff *skb,
+				const struct nft_expr *expr)
+{
+	const struct nft_reject *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_REJECT_TYPE, htonl(priv->type)))
+		goto nla_put_failure;
+
+	switch (priv->type) {
+	case NFT_REJECT_ICMP_UNREACH:
+	case NFT_REJECT_ICMPX_UNREACH:
+		if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code))
+			goto nla_put_failure;
+		break;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -1;
 }
 
 static struct nft_expr_type nft_reject_inet_type;
@@ -32,8 +118,8 @@ static const struct nft_expr_ops nft_reject_inet_ops = {
 	.type		= &nft_reject_inet_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_reject)),
 	.eval		= nft_reject_inet_eval,
-	.init		= nft_reject_init,
-	.dump		= nft_reject_dump,
+	.init		= nft_reject_inet_init,
+	.dump		= nft_reject_inet_dump,
 };
 
 static struct nft_expr_type nft_reject_inet_type __read_mostly = {
-- 
cgit v1.2.3


From dba4b74d2da8798626e2b702ad3f452671e335f7 Mon Sep 17 00:00:00 2001
From: Vladimir Kondratiev <qca_vkondrat@qca.qualcomm.com>
Date: Wed, 1 Oct 2014 15:05:25 +0300
Subject: wil6210: atomic I/O for the card memory

Introduce netdev IOCTLs, to be used by the debug tools.

Allows to read/write single dword value or
memory block, aligned to dword
Different address modes supported:
- BAR offset
- Firmware "linker" address
- target's AHB bus

Signed-off-by: Vladimir Kondratiev <qca_vkondrat@qca.qualcomm.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 MAINTAINERS                                |   1 +
 drivers/net/wireless/ath/wil6210/Makefile  |   1 +
 drivers/net/wireless/ath/wil6210/ioctl.c   | 173 +++++++++++++++++++++++++++++
 drivers/net/wireless/ath/wil6210/netdev.c  |  12 ++
 drivers/net/wireless/ath/wil6210/wil6210.h |   2 +
 include/uapi/linux/wil6210_uapi.h          |  87 +++++++++++++++
 6 files changed, 276 insertions(+)
 create mode 100644 drivers/net/wireless/ath/wil6210/ioctl.c
 create mode 100644 include/uapi/linux/wil6210_uapi.h

(limited to 'include/uapi')

diff --git a/MAINTAINERS b/MAINTAINERS
index 482888d80431..66de6b2923d4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1611,6 +1611,7 @@ L:	wil6210@qca.qualcomm.com
 S:	Supported
 W:	http://wireless.kernel.org/en/users/Drivers/wil6210
 F:	drivers/net/wireless/ath/wil6210/
+F:	include/uapi/linux/wil6210_uapi.h
 
 CARL9170 LINUX COMMUNITY WIRELESS DRIVER
 M:	Christian Lamparter <chunkeey@googlemail.com>
diff --git a/drivers/net/wireless/ath/wil6210/Makefile b/drivers/net/wireless/ath/wil6210/Makefile
index 05f29a6b1ba8..8ad4b5f97e04 100644
--- a/drivers/net/wireless/ath/wil6210/Makefile
+++ b/drivers/net/wireless/ath/wil6210/Makefile
@@ -10,6 +10,7 @@ wil6210-y += interrupt.o
 wil6210-y += txrx.o
 wil6210-y += debug.o
 wil6210-y += rx_reorder.o
+wil6210-y += ioctl.o
 wil6210-y += fw.o
 wil6210-$(CONFIG_WIL6210_TRACING) += trace.o
 wil6210-y += wil_platform.o
diff --git a/drivers/net/wireless/ath/wil6210/ioctl.c b/drivers/net/wireless/ath/wil6210/ioctl.c
new file mode 100644
index 000000000000..e9c0673819c6
--- /dev/null
+++ b/drivers/net/wireless/ath/wil6210/ioctl.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2014 Qualcomm Atheros, Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <linux/uaccess.h>
+
+#include "wil6210.h"
+#include <uapi/linux/wil6210_uapi.h>
+
+#define wil_hex_dump_ioctl(prefix_str, buf, len) \
+	print_hex_dump_debug("DBG[IOC ]" prefix_str, \
+			     DUMP_PREFIX_OFFSET, 16, 1, buf, len, true)
+#define wil_dbg_ioctl(wil, fmt, arg...) wil_dbg(wil, "DBG[IOC ]" fmt, ##arg)
+
+static void __iomem *wil_ioc_addr(struct wil6210_priv *wil, uint32_t addr,
+				  uint32_t size, enum wil_memio_op op)
+{
+	void __iomem *a;
+	u32 off;
+
+	switch (op & wil_mmio_addr_mask) {
+	case wil_mmio_addr_linker:
+		a = wmi_buffer(wil, cpu_to_le32(addr));
+		break;
+	case wil_mmio_addr_ahb:
+		a = wmi_addr(wil, addr);
+		break;
+	case wil_mmio_addr_bar:
+		a = wmi_addr(wil, addr + WIL6210_FW_HOST_OFF);
+		break;
+	default:
+		wil_err(wil, "Unsupported address mode, op = 0x%08x\n", op);
+		return NULL;
+	}
+
+	off = a - wil->csr;
+	if (size >= WIL6210_MEM_SIZE - off) {
+		wil_err(wil, "Requested block does not fit into memory: "
+			"off = 0x%08x size = 0x%08x\n", off, size);
+		return NULL;
+	}
+
+	return a;
+}
+
+static int wil_ioc_memio_dword(struct wil6210_priv *wil, void __user *data)
+{
+	struct wil_memio io;
+	void __iomem *a;
+	bool need_copy = false;
+
+	if (copy_from_user(&io, data, sizeof(io)))
+		return -EFAULT;
+
+	wil_dbg_ioctl(wil, "IO: addr = 0x%08x val = 0x%08x op = 0x%08x\n",
+		      io.addr, io.val, io.op);
+
+	a = wil_ioc_addr(wil, io.addr, sizeof(u32), io.op);
+	if (!a) {
+		wil_err(wil, "invalid address 0x%08x, op = 0x%08x\n", io.addr,
+			io.op);
+		return -EINVAL;
+	}
+	/* operation */
+	switch (io.op & wil_mmio_op_mask) {
+	case wil_mmio_read:
+		io.val = ioread32(a);
+		need_copy = true;
+		break;
+	case wil_mmio_write:
+		iowrite32(io.val, a);
+		wmb(); /* make sure write propagated to HW */
+		break;
+	default:
+		wil_err(wil, "Unsupported operation, op = 0x%08x\n", io.op);
+		return -EINVAL;
+	}
+
+	if (need_copy) {
+		wil_dbg_ioctl(wil, "IO done: addr = 0x%08x"
+			      " val = 0x%08x op = 0x%08x\n",
+			      io.addr, io.val, io.op);
+		if (copy_to_user(data, &io, sizeof(io)))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int wil_ioc_memio_block(struct wil6210_priv *wil, void __user *data)
+{
+	struct wil_memio_block io;
+	void *block;
+	void __iomem *a;
+	int rc = 0;
+
+	if (copy_from_user(&io, data, sizeof(io)))
+		return -EFAULT;
+
+	wil_dbg_ioctl(wil, "IO: addr = 0x%08x size = 0x%08x op = 0x%08x\n",
+		      io.addr, io.size, io.op);
+
+	/* size */
+	if (io.size % 4) {
+		wil_err(wil, "size is not multiple of 4:  0x%08x\n", io.size);
+		return -EINVAL;
+	}
+
+	a = wil_ioc_addr(wil, io.addr, io.size, io.op);
+	if (!a) {
+		wil_err(wil, "invalid address 0x%08x, op = 0x%08x\n", io.addr,
+			io.op);
+		return -EINVAL;
+	}
+
+	block = kmalloc(io.size, GFP_USER);
+	if (!block)
+		return -ENOMEM;
+
+	/* operation */
+	switch (io.op & wil_mmio_op_mask) {
+	case wil_mmio_read:
+		wil_memcpy_fromio_32(block, a, io.size);
+		wil_hex_dump_ioctl("Read  ", block, io.size);
+		if (copy_to_user(io.block, block, io.size)) {
+			rc = -EFAULT;
+			goto out_free;
+		}
+		break;
+	case wil_mmio_write:
+		if (copy_from_user(block, io.block, io.size)) {
+			rc = -EFAULT;
+			goto out_free;
+		}
+		wil_memcpy_toio_32(a, block, io.size);
+		wmb(); /* make sure write propagated to HW */
+		wil_hex_dump_ioctl("Write ", block, io.size);
+		break;
+	default:
+		wil_err(wil, "Unsupported operation, op = 0x%08x\n", io.op);
+		rc = -EINVAL;
+		break;
+	}
+
+out_free:
+	kfree(block);
+	return rc;
+}
+
+int wil_ioctl(struct wil6210_priv *wil, void __user *data, int cmd)
+{
+	switch (cmd) {
+	case WIL_IOCTL_MEMIO:
+		return wil_ioc_memio_dword(wil, data);
+	case WIL_IOCTL_MEMIO_BLOCK:
+		return wil_ioc_memio_block(wil, data);
+	default:
+		wil_dbg_ioctl(wil, "Unsupported IOCTL 0x%04x\n", cmd);
+		return -ENOIOCTLCMD;
+	}
+}
diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c
index c3f0ddfaa5da..239965106c05 100644
--- a/drivers/net/wireless/ath/wil6210/netdev.c
+++ b/drivers/net/wireless/ath/wil6210/netdev.c
@@ -52,6 +52,17 @@ static int wil_change_mtu(struct net_device *ndev, int new_mtu)
 	return 0;
 }
 
+static int wil_do_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd)
+{
+	struct wil6210_priv *wil = ndev_to_wil(ndev);
+
+	int ret = wil_ioctl(wil, ifr->ifr_data, cmd);
+
+	wil_dbg_misc(wil, "ioctl(0x%04x) -> %d\n", cmd, ret);
+
+	return ret;
+}
+
 static const struct net_device_ops wil_netdev_ops = {
 	.ndo_open		= wil_open,
 	.ndo_stop		= wil_stop,
@@ -59,6 +70,7 @@ static const struct net_device_ops wil_netdev_ops = {
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_change_mtu		= wil_change_mtu,
+	.ndo_do_ioctl		= wil_do_ioctl,
 };
 
 static int wil6210_netdev_poll_rx(struct napi_struct *napi, int budget)
diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h
index 2991609885f7..61cceca155a2 100644
--- a/drivers/net/wireless/ath/wil6210/wil6210.h
+++ b/drivers/net/wireless/ath/wil6210/wil6210.h
@@ -595,5 +595,7 @@ void wil6210_unmask_irq_rx(struct wil6210_priv *wil);
 
 int wil_iftype_nl2wmi(enum nl80211_iftype type);
 
+int wil_ioctl(struct wil6210_priv *wil, void __user *data, int cmd);
 int wil_request_firmware(struct wil6210_priv *wil, const char *name);
+
 #endif /* __WIL6210_H__ */
diff --git a/include/uapi/linux/wil6210_uapi.h b/include/uapi/linux/wil6210_uapi.h
new file mode 100644
index 000000000000..6a3cddd156c4
--- /dev/null
+++ b/include/uapi/linux/wil6210_uapi.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2014 Qualcomm Atheros, Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __WIL6210_UAPI_H__
+#define __WIL6210_UAPI_H__
+
+#if !defined(__KERNEL__)
+#define __user
+#endif
+
+#include <linux/sockios.h>
+
+/* Numbers SIOCDEVPRIVATE and SIOCDEVPRIVATE + 1
+ * are used by Android devices to implement PNO (preferred network offload).
+ * Albeit it is temporary solution, use different numbers to avoid conflicts
+ */
+
+/**
+ * Perform 32-bit I/O operation to the card memory
+ *
+ * User code should arrange data in memory like this:
+ *
+ *	struct wil_memio io;
+ *	struct ifreq ifr = {
+ *		.ifr_data = &io,
+ *	};
+ */
+#define WIL_IOCTL_MEMIO (SIOCDEVPRIVATE + 2)
+
+/**
+ * Perform block I/O operation to the card memory
+ *
+ * User code should arrange data in memory like this:
+ *
+ *	void *buf;
+ *	struct wil_memio_block io = {
+ *		.block = buf,
+ *	};
+ *	struct ifreq ifr = {
+ *		.ifr_data = &io,
+ *	};
+ */
+#define WIL_IOCTL_MEMIO_BLOCK (SIOCDEVPRIVATE + 3)
+
+/**
+ * operation to perform
+ *
+ * @wil_mmio_op_mask - bits defining operation,
+ * @wil_mmio_addr_mask - bits defining addressing mode
+ */
+enum wil_memio_op {
+	wil_mmio_read = 0,
+	wil_mmio_write = 1,
+	wil_mmio_op_mask = 0xff,
+	wil_mmio_addr_linker = 0 << 8,
+	wil_mmio_addr_ahb = 1 << 8,
+	wil_mmio_addr_bar = 2 << 8,
+	wil_mmio_addr_mask = 0xff00,
+};
+
+struct wil_memio {
+	uint32_t op; /* enum wil_memio_op */
+	uint32_t addr; /* should be 32-bit aligned */
+	uint32_t val;
+};
+
+struct wil_memio_block {
+	uint32_t op; /* enum wil_memio_op */
+	uint32_t addr; /* should be 32-bit aligned */
+	uint32_t size; /* should be multiple of 4 */
+	void __user *block; /* block address */
+};
+
+#endif /* __WIL6210_UAPI_H__ */
-- 
cgit v1.2.3


From 7c9e7a6fe11c8dc5b3b9d0e889dde73347247584 Mon Sep 17 00:00:00 2001
From: Andy Grover <agrover@redhat.com>
Date: Wed, 1 Oct 2014 16:07:05 -0700
Subject: target: Add a user-passthrough backstore

Add a LIO storage engine that presents commands to userspace for execution.
This would allow more complex backstores to be implemented out-of-kernel,
and also make experimentation a-la FUSE (but at the SCSI level -- "SUSE"?)
possible.

It uses a mmap()able UIO device per LUN to share a command ring and data
area. The commands are raw SCSI CDBs and iovs for in/out data. The command
ring is also reused for returning scsi command status and optional sense
data.

This implementation is based on Shaohua Li's earlier version but heavily
modified. Differences include:

* Shared memory allocated by kernel, not locked-down user pages
* Single ring for command request and response
* Offsets instead of embedded pointers
* Generic SCSI CDB passthrough instead of per-cmd specialization in ring
  format.
* Uses UIO device instead of anon_file passed in mailbox.
* Optional in-kernel handling of some commands.

The main reason for these differences is to permit greater resiliency
if the user process dies or hangs.

Things not yet implemented (on purpose):

* Zero copy. The data area is flexible enough to allow page flipping or
  backend-allocated pages to be used by fabrics, but it's not clear these
  are performance wins. Can come later.
* Out-of-order command completion by userspace. Possible to add by just
  allowing userspace to change cmd_id in rsp cmd entries, but currently
  not supported.
* No locks between kernel cmd submission and completion routines. Sounds
  like it's possible, but this can come later.
* Sparse allocation of mmaped area. Current code vmallocs the whole thing.
  If the mapped area was larger and not fully mapped then the driver would
  have more freedom to change cmd and data area sizes based on demand.

Current code open issues:

* The use of idrs may be overkill -- we maybe can replace them with a
  simple counter to generate cmd_ids, and a hash table to get a cmd_id's
  associated pointer.
* Use of a free-running counter for cmd ring instead of explicit modulo
  math. This would require power-of-2 cmd ring size.

(Add kconfig depends NET - Randy)

Signed-off-by: Andy Grover <agrover@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/Kconfig                 |    7 +
 drivers/target/Makefile                |    1 +
 drivers/target/target_core_transport.c |    4 +
 drivers/target/target_core_user.c      | 1163 ++++++++++++++++++++++++++++++++
 include/uapi/linux/Kbuild              |    1 +
 include/uapi/linux/target_core_user.h  |  142 ++++
 6 files changed, 1318 insertions(+)
 create mode 100644 drivers/target/target_core_user.c
 create mode 100644 include/uapi/linux/target_core_user.h

(limited to 'include/uapi')

diff --git a/drivers/target/Kconfig b/drivers/target/Kconfig
index dc2d84ac5a0e..81d44c477a5b 100644
--- a/drivers/target/Kconfig
+++ b/drivers/target/Kconfig
@@ -31,6 +31,13 @@ config TCM_PSCSI
 	Say Y here to enable the TCM/pSCSI subsystem plugin for non-buffered
 	passthrough access to Linux/SCSI device
 
+config TCM_USER
+	tristate "TCM/USER Subsystem Plugin for Linux"
+	depends on UIO && NET
+	help
+	Say Y here to enable the TCM/USER subsystem plugin for a userspace
+	process to handle requests
+
 source "drivers/target/loopback/Kconfig"
 source "drivers/target/tcm_fc/Kconfig"
 source "drivers/target/iscsi/Kconfig"
diff --git a/drivers/target/Makefile b/drivers/target/Makefile
index 85b012d2f89b..bbb4a7d638ef 100644
--- a/drivers/target/Makefile
+++ b/drivers/target/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_TARGET_CORE)	+= target_core_mod.o
 obj-$(CONFIG_TCM_IBLOCK)	+= target_core_iblock.o
 obj-$(CONFIG_TCM_FILEIO)	+= target_core_file.o
 obj-$(CONFIG_TCM_PSCSI)		+= target_core_pscsi.o
+obj-$(CONFIG_TCM_USER)		+= target_core_user.o
 
 # Fabric modules
 obj-$(CONFIG_LOOPBACK_TARGET)	+= loopback/
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 9700ea125268..9ea0d5f03f7a 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -232,6 +232,10 @@ void transport_subsystem_check_init(void)
 	if (ret != 0)
 		pr_err("Unable to load target_core_pscsi\n");
 
+	ret = request_module("target_core_user");
+	if (ret != 0)
+		pr_err("Unable to load target_core_user\n");
+
 	sub_api_initialized = 1;
 }
 
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
new file mode 100644
index 000000000000..6608ecf94570
--- /dev/null
+++ b/drivers/target/target_core_user.c
@@ -0,0 +1,1163 @@
+/*
+ * Copyright (C) 2013 Shaohua Li <shli@kernel.org>
+ * Copyright (C) 2014 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/idr.h>
+#include <linux/timer.h>
+#include <linux/parser.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <linux/uio_driver.h>
+#include <net/genetlink.h>
+#include <target/target_core_base.h>
+#include <target/target_core_fabric.h>
+#include <target/target_core_backend.h>
+#include <linux/target_core_user.h>
+
+/*
+ * Define a shared-memory interface for LIO to pass SCSI commands and
+ * data to userspace for processing. This is to allow backends that
+ * are too complex for in-kernel support to be possible.
+ *
+ * It uses the UIO framework to do a lot of the device-creation and
+ * introspection work for us.
+ *
+ * See the .h file for how the ring is laid out. Note that while the
+ * command ring is defined, the particulars of the data area are
+ * not. Offset values in the command entry point to other locations
+ * internal to the mmap()ed area. There is separate space outside the
+ * command ring for data buffers. This leaves maximum flexibility for
+ * moving buffer allocations, or even page flipping or other
+ * allocation techniques, without altering the command ring layout.
+ *
+ * SECURITY:
+ * The user process must be assumed to be malicious. There's no way to
+ * prevent it breaking the command ring protocol if it wants, but in
+ * order to prevent other issues we must only ever read *data* from
+ * the shared memory area, not offsets or sizes. This applies to
+ * command ring entries as well as the mailbox. Extra code needed for
+ * this may have a 'UAM' comment.
+ */
+
+
+#define TCMU_TIME_OUT (30 * MSEC_PER_SEC)
+
+#define CMDR_SIZE (16 * 4096)
+#define DATA_SIZE (257 * 4096)
+
+#define TCMU_RING_SIZE (CMDR_SIZE + DATA_SIZE)
+
+static struct device *tcmu_root_device;
+
+struct tcmu_hba {
+	u32 host_id;
+};
+
+/* User wants all cmds or just some */
+enum passthru_level {
+	TCMU_PASS_ALL = 0,
+	TCMU_PASS_IO,
+	TCMU_PASS_INVALID,
+};
+
+#define TCMU_CONFIG_LEN 256
+
+struct tcmu_dev {
+	struct se_device se_dev;
+
+	char *name;
+	struct se_hba *hba;
+
+#define TCMU_DEV_BIT_OPEN 0
+#define TCMU_DEV_BIT_BROKEN 1
+	unsigned long flags;
+	enum passthru_level pass_level;
+
+	struct uio_info uio_info;
+
+	struct tcmu_mailbox *mb_addr;
+	size_t dev_size;
+	u32 cmdr_size;
+	u32 cmdr_last_cleaned;
+	/* Offset of data ring from start of mb */
+	size_t data_off;
+	size_t data_size;
+	/* Ring head + tail values. */
+	/* Must add data_off and mb_addr to get the address */
+	size_t data_head;
+	size_t data_tail;
+
+	wait_queue_head_t wait_cmdr;
+	/* TODO should this be a mutex? */
+	spinlock_t cmdr_lock;
+
+	struct idr commands;
+	spinlock_t commands_lock;
+
+	struct timer_list timeout;
+
+	char dev_config[TCMU_CONFIG_LEN];
+};
+
+#define TCMU_DEV(_se_dev) container_of(_se_dev, struct tcmu_dev, se_dev)
+
+#define CMDR_OFF sizeof(struct tcmu_mailbox)
+
+struct tcmu_cmd {
+	struct se_cmd *se_cmd;
+	struct tcmu_dev *tcmu_dev;
+
+	uint16_t cmd_id;
+
+	/* Can't use se_cmd->data_length when cleaning up expired cmds, because if
+	   cmd has been completed then accessing se_cmd is off limits */
+	size_t data_length;
+
+	unsigned long deadline;
+
+#define TCMU_CMD_BIT_EXPIRED 0
+	unsigned long flags;
+};
+
+static struct kmem_cache *tcmu_cmd_cache;
+
+/* multicast group */
+enum tcmu_multicast_groups {
+	TCMU_MCGRP_CONFIG,
+};
+
+static const struct genl_multicast_group tcmu_mcgrps[] = {
+	[TCMU_MCGRP_CONFIG] = { .name = "config", },
+};
+
+/* Our generic netlink family */
+static struct genl_family tcmu_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = 0,
+	.name = "TCM-USER",
+	.version = 1,
+	.maxattr = TCMU_ATTR_MAX,
+	.mcgrps = tcmu_mcgrps,
+	.n_mcgrps = ARRAY_SIZE(tcmu_mcgrps),
+};
+
+static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
+{
+	struct se_device *se_dev = se_cmd->se_dev;
+	struct tcmu_dev *udev = TCMU_DEV(se_dev);
+	struct tcmu_cmd *tcmu_cmd;
+	int cmd_id;
+
+	tcmu_cmd = kmem_cache_zalloc(tcmu_cmd_cache, GFP_KERNEL);
+	if (!tcmu_cmd)
+		return NULL;
+
+	tcmu_cmd->se_cmd = se_cmd;
+	tcmu_cmd->tcmu_dev = udev;
+	tcmu_cmd->data_length = se_cmd->data_length;
+
+	tcmu_cmd->deadline = jiffies + msecs_to_jiffies(TCMU_TIME_OUT);
+
+	idr_preload(GFP_KERNEL);
+	spin_lock_irq(&udev->commands_lock);
+	cmd_id = idr_alloc(&udev->commands, tcmu_cmd, 0,
+		USHRT_MAX, GFP_NOWAIT);
+	spin_unlock_irq(&udev->commands_lock);
+	idr_preload_end();
+
+	if (cmd_id < 0) {
+		kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
+		return NULL;
+	}
+	tcmu_cmd->cmd_id = cmd_id;
+
+	return tcmu_cmd;
+}
+
+static inline void tcmu_flush_dcache_range(void *vaddr, size_t size)
+{
+	unsigned long offset = (unsigned long) vaddr & ~PAGE_MASK;
+
+	size = round_up(size+offset, PAGE_SIZE);
+	vaddr -= offset;
+
+	while (size) {
+		flush_dcache_page(virt_to_page(vaddr));
+		size -= PAGE_SIZE;
+	}
+}
+
+/*
+ * Some ring helper functions. We don't assume size is a power of 2 so
+ * we can't use circ_buf.h.
+ */
+static inline size_t spc_used(size_t head, size_t tail, size_t size)
+{
+	int diff = head - tail;
+
+	if (diff >= 0)
+		return diff;
+	else
+		return size + diff;
+}
+
+static inline size_t spc_free(size_t head, size_t tail, size_t size)
+{
+	/* Keep 1 byte unused or we can't tell full from empty */
+	return (size - spc_used(head, tail, size) - 1);
+}
+
+static inline size_t head_to_end(size_t head, size_t size)
+{
+	return size - head;
+}
+
+#define UPDATE_HEAD(head, used, size) smp_store_release(&head, ((head % size) + used) % size)
+
+/*
+ * We can't queue a command until we have space available on the cmd ring *and* space
+ * space avail on the data ring.
+ *
+ * Called with ring lock held.
+ */
+static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_needed, size_t data_needed)
+{
+	struct tcmu_mailbox *mb = udev->mb_addr;
+	size_t space;
+	u32 cmd_head;
+
+	tcmu_flush_dcache_range(mb, sizeof(*mb));
+
+	cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
+
+	space = spc_free(cmd_head, udev->cmdr_last_cleaned, udev->cmdr_size);
+	if (space < cmd_needed) {
+		pr_debug("no cmd space: %u %u %u\n", cmd_head,
+		       udev->cmdr_last_cleaned, udev->cmdr_size);
+		return false;
+	}
+
+	space = spc_free(udev->data_head, udev->data_tail, udev->data_size);
+	if (space < data_needed) {
+		pr_debug("no data space: %zu %zu %zu\n", udev->data_head,
+		       udev->data_tail, udev->data_size);
+		return false;
+	}
+
+	return true;
+}
+
+static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
+{
+	struct tcmu_dev *udev = tcmu_cmd->tcmu_dev;
+	struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
+	size_t base_command_size, command_size;
+	size_t cmdr_space_needed;
+	struct tcmu_mailbox *mb;
+	size_t pad_size;
+	struct tcmu_cmd_entry *entry;
+	int i;
+	struct scatterlist *sg;
+	struct iovec *iov;
+	int iov_cnt = 0;
+	uint32_t cmd_head;
+	uint64_t cdb_off;
+
+	if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags))
+		return -EINVAL;
+
+	/*
+	 * Must be a certain minimum size for response sense info, but
+	 * also may be larger if the iov array is large.
+	 *
+	 * iovs = sgl_nents+1, for end-of-ring case, plus another 1
+	 * b/c size == offsetof one-past-element.
+	*/
+	base_command_size = max(offsetof(struct tcmu_cmd_entry,
+					 req.iov[se_cmd->t_data_nents + 2]),
+				sizeof(struct tcmu_cmd_entry));
+	command_size = base_command_size
+		+ round_up(scsi_command_size(se_cmd->t_task_cdb), TCMU_OP_ALIGN_SIZE);
+
+	WARN_ON(command_size & (TCMU_OP_ALIGN_SIZE-1));
+
+	spin_lock_irq(&udev->cmdr_lock);
+
+	mb = udev->mb_addr;
+	cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
+	if ((command_size > (udev->cmdr_size / 2))
+	    || tcmu_cmd->data_length > (udev->data_size - 1))
+		pr_warn("TCMU: Request of size %zu/%zu may be too big for %u/%zu "
+			"cmd/data ring buffers\n", command_size, tcmu_cmd->data_length,
+			udev->cmdr_size, udev->data_size);
+
+	/*
+	 * Cmd end-of-ring space is too small so we need space for a NOP plus
+	 * original cmd - cmds are internally contiguous.
+	 */
+	if (head_to_end(cmd_head, udev->cmdr_size) >= command_size)
+		pad_size = 0;
+	else
+		pad_size = head_to_end(cmd_head, udev->cmdr_size);
+	cmdr_space_needed = command_size + pad_size;
+
+	while (!is_ring_space_avail(udev, cmdr_space_needed, tcmu_cmd->data_length)) {
+		int ret;
+		DEFINE_WAIT(__wait);
+
+		prepare_to_wait(&udev->wait_cmdr, &__wait, TASK_INTERRUPTIBLE);
+
+		pr_debug("sleeping for ring space\n");
+		spin_unlock_irq(&udev->cmdr_lock);
+		ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT));
+		finish_wait(&udev->wait_cmdr, &__wait);
+		if (!ret) {
+			pr_warn("tcmu: command timed out\n");
+			return -ETIMEDOUT;
+		}
+
+		spin_lock_irq(&udev->cmdr_lock);
+
+		/* We dropped cmdr_lock, cmd_head is stale */
+		cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
+	}
+
+	if (pad_size) {
+		entry = (void *) mb + CMDR_OFF + cmd_head;
+		tcmu_flush_dcache_range(entry, sizeof(*entry));
+		tcmu_hdr_set_op(&entry->hdr, TCMU_OP_PAD);
+		tcmu_hdr_set_len(&entry->hdr, pad_size);
+
+		UPDATE_HEAD(mb->cmd_head, pad_size, udev->cmdr_size);
+
+		cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
+		WARN_ON(cmd_head != 0);
+	}
+
+	entry = (void *) mb + CMDR_OFF + cmd_head;
+	tcmu_flush_dcache_range(entry, sizeof(*entry));
+	tcmu_hdr_set_op(&entry->hdr, TCMU_OP_CMD);
+	tcmu_hdr_set_len(&entry->hdr, command_size);
+	entry->cmd_id = tcmu_cmd->cmd_id;
+
+	/*
+	 * Fix up iovecs, and handle if allocation in data ring wrapped.
+	 */
+	iov = &entry->req.iov[0];
+	for_each_sg(se_cmd->t_data_sg, sg, se_cmd->t_data_nents, i) {
+		size_t copy_bytes = min((size_t)sg->length,
+				     head_to_end(udev->data_head, udev->data_size));
+		void *from = kmap_atomic(sg_page(sg)) + sg->offset;
+		void *to = (void *) mb + udev->data_off + udev->data_head;
+
+		if (tcmu_cmd->se_cmd->data_direction == DMA_TO_DEVICE) {
+			memcpy(to, from, copy_bytes);
+			tcmu_flush_dcache_range(to, copy_bytes);
+		}
+
+		/* Even iov_base is relative to mb_addr */
+		iov->iov_len = copy_bytes;
+		iov->iov_base = (void *) udev->data_off + udev->data_head;
+		iov_cnt++;
+		iov++;
+
+		UPDATE_HEAD(udev->data_head, copy_bytes, udev->data_size);
+
+		/* Uh oh, we wrapped the buffer. Must split sg across 2 iovs. */
+		if (sg->length != copy_bytes) {
+			from += copy_bytes;
+			copy_bytes = sg->length - copy_bytes;
+
+			iov->iov_len = copy_bytes;
+			iov->iov_base = (void *) udev->data_off + udev->data_head;
+
+			if (se_cmd->data_direction == DMA_TO_DEVICE) {
+				to = (void *) mb + udev->data_off + udev->data_head;
+				memcpy(to, from, copy_bytes);
+				tcmu_flush_dcache_range(to, copy_bytes);
+			}
+
+			iov_cnt++;
+			iov++;
+
+			UPDATE_HEAD(udev->data_head, copy_bytes, udev->data_size);
+		}
+
+		kunmap_atomic(from);
+	}
+	entry->req.iov_cnt = iov_cnt;
+
+	/* All offsets relative to mb_addr, not start of entry! */
+	cdb_off = CMDR_OFF + cmd_head + base_command_size;
+	memcpy((void *) mb + cdb_off, se_cmd->t_task_cdb, scsi_command_size(se_cmd->t_task_cdb));
+	entry->req.cdb_off = cdb_off;
+	tcmu_flush_dcache_range(entry, sizeof(*entry));
+
+	UPDATE_HEAD(mb->cmd_head, command_size, udev->cmdr_size);
+	tcmu_flush_dcache_range(mb, sizeof(*mb));
+
+	spin_unlock_irq(&udev->cmdr_lock);
+
+	/* TODO: only if FLUSH and FUA? */
+	uio_event_notify(&udev->uio_info);
+
+	mod_timer(&udev->timeout,
+		round_jiffies_up(jiffies + msecs_to_jiffies(TCMU_TIME_OUT)));
+
+	return 0;
+}
+
+static int tcmu_queue_cmd(struct se_cmd *se_cmd)
+{
+	struct se_device *se_dev = se_cmd->se_dev;
+	struct tcmu_dev *udev = TCMU_DEV(se_dev);
+	struct tcmu_cmd *tcmu_cmd;
+	int ret;
+
+	tcmu_cmd = tcmu_alloc_cmd(se_cmd);
+	if (!tcmu_cmd)
+		return -ENOMEM;
+
+	ret = tcmu_queue_cmd_ring(tcmu_cmd);
+	if (ret < 0) {
+		pr_err("TCMU: Could not queue command\n");
+		spin_lock_irq(&udev->commands_lock);
+		idr_remove(&udev->commands, tcmu_cmd->cmd_id);
+		spin_unlock_irq(&udev->commands_lock);
+
+		kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
+	}
+
+	return ret;
+}
+
+static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *entry)
+{
+	struct se_cmd *se_cmd = cmd->se_cmd;
+	struct tcmu_dev *udev = cmd->tcmu_dev;
+
+	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
+		/* cmd has been completed already from timeout, just reclaim data
+		   ring space */
+		UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
+		return;
+	}
+
+	if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) {
+		memcpy(se_cmd->sense_buffer, entry->rsp.sense_buffer,
+			       se_cmd->scsi_sense_length);
+
+		UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
+	}
+	else if (se_cmd->data_direction == DMA_FROM_DEVICE) {
+		struct scatterlist *sg;
+		int i;
+
+		/* It'd be easier to look at entry's iovec again, but UAM */
+		for_each_sg(se_cmd->t_data_sg, sg, se_cmd->t_data_nents, i) {
+			size_t copy_bytes;
+			void *to;
+			void *from;
+
+			copy_bytes = min((size_t)sg->length,
+					 head_to_end(udev->data_tail, udev->data_size));
+
+			to = kmap_atomic(sg_page(sg)) + sg->offset;
+			WARN_ON(sg->length + sg->offset > PAGE_SIZE);
+			from = (void *) udev->mb_addr + udev->data_off + udev->data_tail;
+			tcmu_flush_dcache_range(from, copy_bytes);
+			memcpy(to, from, copy_bytes);
+
+			UPDATE_HEAD(udev->data_tail, copy_bytes, udev->data_size);
+
+			/* Uh oh, wrapped the data buffer for this sg's data */
+			if (sg->length != copy_bytes) {
+				from = (void *) udev->mb_addr + udev->data_off + udev->data_tail;
+				WARN_ON(udev->data_tail);
+				to += copy_bytes;
+				copy_bytes = sg->length - copy_bytes;
+				tcmu_flush_dcache_range(from, copy_bytes);
+				memcpy(to, from, copy_bytes);
+
+				UPDATE_HEAD(udev->data_tail, copy_bytes, udev->data_size);
+			}
+
+			kunmap_atomic(to);
+		}
+
+	} else if (se_cmd->data_direction == DMA_TO_DEVICE) {
+		UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
+	} else {
+		pr_warn("TCMU: data direction was %d!\n", se_cmd->data_direction);
+	}
+
+	target_complete_cmd(cmd->se_cmd, entry->rsp.scsi_status);
+	cmd->se_cmd = NULL;
+
+	kmem_cache_free(tcmu_cmd_cache, cmd);
+}
+
+static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
+{
+	struct tcmu_mailbox *mb;
+	LIST_HEAD(cpl_cmds);
+	unsigned long flags;
+	int handled = 0;
+
+	if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) {
+		pr_err("ring broken, not handling completions\n");
+		return 0;
+	}
+
+	spin_lock_irqsave(&udev->cmdr_lock, flags);
+
+	mb = udev->mb_addr;
+	tcmu_flush_dcache_range(mb, sizeof(*mb));
+
+	while (udev->cmdr_last_cleaned != ACCESS_ONCE(mb->cmd_tail)) {
+
+		struct tcmu_cmd_entry *entry = (void *) mb + CMDR_OFF + udev->cmdr_last_cleaned;
+		struct tcmu_cmd *cmd;
+
+		tcmu_flush_dcache_range(entry, sizeof(*entry));
+
+		if (tcmu_hdr_get_op(&entry->hdr) == TCMU_OP_PAD) {
+			UPDATE_HEAD(udev->cmdr_last_cleaned, tcmu_hdr_get_len(&entry->hdr), udev->cmdr_size);
+			continue;
+		}
+		WARN_ON(tcmu_hdr_get_op(&entry->hdr) != TCMU_OP_CMD);
+
+		spin_lock(&udev->commands_lock);
+		cmd = idr_find(&udev->commands, entry->cmd_id);
+		if (cmd)
+			idr_remove(&udev->commands, cmd->cmd_id);
+		spin_unlock(&udev->commands_lock);
+
+		if (!cmd) {
+			pr_err("cmd_id not found, ring is broken\n");
+			set_bit(TCMU_DEV_BIT_BROKEN, &udev->flags);
+			break;
+		}
+
+		tcmu_handle_completion(cmd, entry);
+
+		UPDATE_HEAD(udev->cmdr_last_cleaned, tcmu_hdr_get_len(&entry->hdr), udev->cmdr_size);
+
+		handled++;
+	}
+
+	if (mb->cmd_tail == mb->cmd_head)
+		del_timer(&udev->timeout); /* no more pending cmds */
+
+	spin_unlock_irqrestore(&udev->cmdr_lock, flags);
+
+	wake_up(&udev->wait_cmdr);
+
+	return handled;
+}
+
+static int tcmu_check_expired_cmd(int id, void *p, void *data)
+{
+	struct tcmu_cmd *cmd = p;
+
+	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags))
+		return 0;
+
+	if (!time_after(cmd->deadline, jiffies))
+		return 0;
+
+	set_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags);
+	target_complete_cmd(cmd->se_cmd, SAM_STAT_CHECK_CONDITION);
+	cmd->se_cmd = NULL;
+
+	kmem_cache_free(tcmu_cmd_cache, cmd);
+
+	return 0;
+}
+
+static void tcmu_device_timedout(unsigned long data)
+{
+	struct tcmu_dev *udev = (struct tcmu_dev *)data;
+	unsigned long flags;
+	int handled;
+
+	handled = tcmu_handle_completions(udev);
+
+	pr_warn("%d completions handled from timeout\n", handled);
+
+	spin_lock_irqsave(&udev->commands_lock, flags);
+	idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL);
+	spin_unlock_irqrestore(&udev->commands_lock, flags);
+
+	/*
+	 * We don't need to wakeup threads on wait_cmdr since they have their
+	 * own timeout.
+	 */
+}
+
+static int tcmu_attach_hba(struct se_hba *hba, u32 host_id)
+{
+	struct tcmu_hba *tcmu_hba;
+
+	tcmu_hba = kzalloc(sizeof(struct tcmu_hba), GFP_KERNEL);
+	if (!tcmu_hba)
+		return -ENOMEM;
+
+	tcmu_hba->host_id = host_id;
+	hba->hba_ptr = tcmu_hba;
+
+	return 0;
+}
+
+static void tcmu_detach_hba(struct se_hba *hba)
+{
+	kfree(hba->hba_ptr);
+	hba->hba_ptr = NULL;
+}
+
+static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
+{
+	struct tcmu_dev *udev;
+
+	udev = kzalloc(sizeof(struct tcmu_dev), GFP_KERNEL);
+	if (!udev)
+		return NULL;
+
+	udev->name = kstrdup(name, GFP_KERNEL);
+	if (!udev->name) {
+		kfree(udev);
+		return NULL;
+	}
+
+	udev->hba = hba;
+
+	init_waitqueue_head(&udev->wait_cmdr);
+	spin_lock_init(&udev->cmdr_lock);
+
+	idr_init(&udev->commands);
+	spin_lock_init(&udev->commands_lock);
+
+	setup_timer(&udev->timeout, tcmu_device_timedout,
+		(unsigned long)udev);
+
+	udev->pass_level = TCMU_PASS_ALL;
+
+	return &udev->se_dev;
+}
+
+static int tcmu_irqcontrol(struct uio_info *info, s32 irq_on)
+{
+	struct tcmu_dev *tcmu_dev = container_of(info, struct tcmu_dev, uio_info);
+
+	tcmu_handle_completions(tcmu_dev);
+
+	return 0;
+}
+
+/*
+ * mmap code from uio.c. Copied here because we want to hook mmap()
+ * and this stuff must come along.
+ */
+static int tcmu_find_mem_index(struct vm_area_struct *vma)
+{
+	struct tcmu_dev *udev = vma->vm_private_data;
+	struct uio_info *info = &udev->uio_info;
+
+	if (vma->vm_pgoff < MAX_UIO_MAPS) {
+		if (info->mem[vma->vm_pgoff].size == 0)
+			return -1;
+		return (int)vma->vm_pgoff;
+	}
+	return -1;
+}
+
+static int tcmu_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct tcmu_dev *udev = vma->vm_private_data;
+	struct uio_info *info = &udev->uio_info;
+	struct page *page;
+	unsigned long offset;
+	void *addr;
+
+	int mi = tcmu_find_mem_index(vma);
+	if (mi < 0)
+		return VM_FAULT_SIGBUS;
+
+	/*
+	 * We need to subtract mi because userspace uses offset = N*PAGE_SIZE
+	 * to use mem[N].
+	 */
+	offset = (vmf->pgoff - mi) << PAGE_SHIFT;
+
+	addr = (void *)(unsigned long)info->mem[mi].addr + offset;
+	if (info->mem[mi].memtype == UIO_MEM_LOGICAL)
+		page = virt_to_page(addr);
+	else
+		page = vmalloc_to_page(addr);
+	get_page(page);
+	vmf->page = page;
+	return 0;
+}
+
+static const struct vm_operations_struct tcmu_vm_ops = {
+	.fault = tcmu_vma_fault,
+};
+
+static int tcmu_mmap(struct uio_info *info, struct vm_area_struct *vma)
+{
+	struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
+
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+	vma->vm_ops = &tcmu_vm_ops;
+
+	vma->vm_private_data = udev;
+
+	/* Ensure the mmap is exactly the right size */
+	if (vma_pages(vma) != (TCMU_RING_SIZE >> PAGE_SHIFT))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int tcmu_open(struct uio_info *info, struct inode *inode)
+{
+	struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
+
+	/* O_EXCL not supported for char devs, so fake it? */
+	if (test_and_set_bit(TCMU_DEV_BIT_OPEN, &udev->flags))
+		return -EBUSY;
+
+	pr_debug("open\n");
+
+	return 0;
+}
+
+static int tcmu_release(struct uio_info *info, struct inode *inode)
+{
+	struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
+
+	clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags);
+
+	pr_debug("close\n");
+
+	return 0;
+}
+
+static int tcmu_netlink_event(enum tcmu_genl_cmd cmd, const char *name, int minor)
+{
+	struct sk_buff *skb;
+	void *msg_header;
+	int ret;
+
+	skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	msg_header = genlmsg_put(skb, 0, 0, &tcmu_genl_family, 0, cmd);
+	if (!msg_header) {
+		nlmsg_free(skb);
+		return -ENOMEM;
+	}
+
+	ret = nla_put_string(skb, TCMU_ATTR_DEVICE, name);
+
+	ret = nla_put_u32(skb, TCMU_ATTR_MINOR, minor);
+
+	ret = genlmsg_end(skb, msg_header);
+	if (ret < 0) {
+		nlmsg_free(skb);
+		return ret;
+	}
+
+	ret = genlmsg_multicast(&tcmu_genl_family, skb, 0,
+				TCMU_MCGRP_CONFIG, GFP_KERNEL);
+
+	/* We don't care if no one is listening */
+	if (ret == -ESRCH)
+		ret = 0;
+
+	return ret;
+}
+
+static int tcmu_configure_device(struct se_device *dev)
+{
+	struct tcmu_dev *udev = TCMU_DEV(dev);
+	struct tcmu_hba *hba = udev->hba->hba_ptr;
+	struct uio_info *info;
+	struct tcmu_mailbox *mb;
+	size_t size;
+	size_t used;
+	int ret = 0;
+	char *str;
+
+	info = &udev->uio_info;
+
+	size = snprintf(NULL, 0, "tcm-user/%u/%s/%s", hba->host_id, udev->name,
+			udev->dev_config);
+	size += 1; /* for \0 */
+	str = kmalloc(size, GFP_KERNEL);
+	if (!str)
+		return -ENOMEM;
+
+	used = snprintf(str, size, "tcm-user/%u/%s", hba->host_id, udev->name);
+
+	if (udev->dev_config[0])
+		snprintf(str + used, size - used, "/%s", udev->dev_config);
+
+	info->name = str;
+
+	udev->mb_addr = vzalloc(TCMU_RING_SIZE);
+	if (!udev->mb_addr) {
+		ret = -ENOMEM;
+		goto err_vzalloc;
+	}
+
+	/* mailbox fits in first part of CMDR space */
+	udev->cmdr_size = CMDR_SIZE - CMDR_OFF;
+	udev->data_off = CMDR_SIZE;
+	udev->data_size = TCMU_RING_SIZE - CMDR_SIZE;
+
+	mb = udev->mb_addr;
+	mb->version = 1;
+	mb->cmdr_off = CMDR_OFF;
+	mb->cmdr_size = udev->cmdr_size;
+
+	WARN_ON(!PAGE_ALIGNED(udev->data_off));
+	WARN_ON(udev->data_size % PAGE_SIZE);
+
+	info->version = "1";
+
+	info->mem[0].name = "tcm-user command & data buffer";
+	info->mem[0].addr = (phys_addr_t) udev->mb_addr;
+	info->mem[0].size = TCMU_RING_SIZE;
+	info->mem[0].memtype = UIO_MEM_VIRTUAL;
+
+	info->irqcontrol = tcmu_irqcontrol;
+	info->irq = UIO_IRQ_CUSTOM;
+
+	info->mmap = tcmu_mmap;
+	info->open = tcmu_open;
+	info->release = tcmu_release;
+
+	ret = uio_register_device(tcmu_root_device, info);
+	if (ret)
+		goto err_register;
+
+	/* Other attributes can be configured in userspace */
+	dev->dev_attrib.hw_block_size = 512;
+	dev->dev_attrib.hw_max_sectors = 128;
+	dev->dev_attrib.hw_queue_depth = 128;
+
+	ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name,
+				 udev->uio_info.uio_dev->minor);
+	if (ret)
+		goto err_netlink;
+
+	return 0;
+
+err_netlink:
+	uio_unregister_device(&udev->uio_info);
+err_register:
+	vfree(udev->mb_addr);
+err_vzalloc:
+	kfree(info->name);
+
+	return ret;
+}
+
+static int tcmu_check_pending_cmd(int id, void *p, void *data)
+{
+	struct tcmu_cmd *cmd = p;
+
+	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags))
+		return 0;
+	return -EINVAL;
+}
+
+static void tcmu_free_device(struct se_device *dev)
+{
+	struct tcmu_dev *udev = TCMU_DEV(dev);
+	int i;
+
+	del_timer_sync(&udev->timeout);
+
+	vfree(udev->mb_addr);
+
+	/* Upper layer should drain all requests before calling this */
+	spin_lock_irq(&udev->commands_lock);
+	i = idr_for_each(&udev->commands, tcmu_check_pending_cmd, NULL);
+	idr_destroy(&udev->commands);
+	spin_unlock_irq(&udev->commands_lock);
+	WARN_ON(i);
+
+	/* Device was configured */
+	if (udev->uio_info.uio_dev) {
+		tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name,
+				   udev->uio_info.uio_dev->minor);
+
+		uio_unregister_device(&udev->uio_info);
+		kfree(udev->uio_info.name);
+		kfree(udev->name);
+	}
+
+	kfree(udev);
+}
+
+enum {
+	Opt_dev_config, Opt_dev_size, Opt_err, Opt_pass_level,
+};
+
+static match_table_t tokens = {
+	{Opt_dev_config, "dev_config=%s"},
+	{Opt_dev_size, "dev_size=%u"},
+	{Opt_pass_level, "pass_level=%u"},
+	{Opt_err, NULL}
+};
+
+static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
+		const char *page, ssize_t count)
+{
+	struct tcmu_dev *udev = TCMU_DEV(dev);
+	char *orig, *ptr, *opts, *arg_p;
+	substring_t args[MAX_OPT_ARGS];
+	int ret = 0, token;
+	int arg;
+
+	opts = kstrdup(page, GFP_KERNEL);
+	if (!opts)
+		return -ENOMEM;
+
+	orig = opts;
+
+	while ((ptr = strsep(&opts, ",\n")) != NULL) {
+		if (!*ptr)
+			continue;
+
+		token = match_token(ptr, tokens, args);
+		switch (token) {
+		case Opt_dev_config:
+			if (match_strlcpy(udev->dev_config, &args[0],
+					  TCMU_CONFIG_LEN) == 0) {
+				ret = -EINVAL;
+				break;
+			}
+			pr_debug("TCMU: Referencing Path: %s\n", udev->dev_config);
+			break;
+		case Opt_dev_size:
+			arg_p = match_strdup(&args[0]);
+			if (!arg_p) {
+				ret = -ENOMEM;
+				break;
+			}
+			ret = kstrtoul(arg_p, 0, (unsigned long *) &udev->dev_size);
+			kfree(arg_p);
+			if (ret < 0)
+				pr_err("kstrtoul() failed for dev_size=\n");
+			break;
+		case Opt_pass_level:
+			match_int(args, &arg);
+			if (arg >= TCMU_PASS_INVALID) {
+				pr_warn("TCMU: Invalid pass_level: %d\n", arg);
+				break;
+			}
+
+			pr_debug("TCMU: Setting pass_level to %d\n", arg);
+			udev->pass_level = arg;
+			break;
+		default:
+			break;
+		}
+	}
+
+	kfree(orig);
+	return (!ret) ? count : ret;
+}
+
+static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b)
+{
+	struct tcmu_dev *udev = TCMU_DEV(dev);
+	ssize_t bl = 0;
+
+	bl = sprintf(b + bl, "Config: %s ",
+		     udev->dev_config[0] ? udev->dev_config : "NULL");
+	bl += sprintf(b + bl, "Size: %zu PassLevel: %u\n",
+		      udev->dev_size, udev->pass_level);
+
+	return bl;
+}
+
+static sector_t tcmu_get_blocks(struct se_device *dev)
+{
+	struct tcmu_dev *udev = TCMU_DEV(dev);
+
+	return div_u64(udev->dev_size - dev->dev_attrib.block_size,
+		       dev->dev_attrib.block_size);
+}
+
+static sense_reason_t
+tcmu_execute_rw(struct se_cmd *se_cmd, struct scatterlist *sgl, u32 sgl_nents,
+		enum dma_data_direction data_direction)
+{
+	int ret;
+
+	ret = tcmu_queue_cmd(se_cmd);
+
+	if (ret != 0)
+		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+	else
+		return TCM_NO_SENSE;
+}
+
+static sense_reason_t
+tcmu_pass_op(struct se_cmd *se_cmd)
+{
+	int ret = tcmu_queue_cmd(se_cmd);
+
+	if (ret != 0)
+		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+	else
+		return TCM_NO_SENSE;
+}
+
+static struct sbc_ops tcmu_sbc_ops = {
+	.execute_rw = tcmu_execute_rw,
+	.execute_sync_cache	= tcmu_pass_op,
+	.execute_write_same	= tcmu_pass_op,
+	.execute_write_same_unmap = tcmu_pass_op,
+	.execute_unmap		= tcmu_pass_op,
+};
+
+static sense_reason_t
+tcmu_parse_cdb(struct se_cmd *cmd)
+{
+	unsigned char *cdb = cmd->t_task_cdb;
+	struct tcmu_dev *udev = TCMU_DEV(cmd->se_dev);
+	sense_reason_t ret;
+
+	switch (udev->pass_level) {
+	case TCMU_PASS_ALL:
+		/* We're just like pscsi, then */
+		/*
+		 * For REPORT LUNS we always need to emulate the response, for everything
+		 * else, pass it up.
+		 */
+		switch (cdb[0]) {
+		case REPORT_LUNS:
+			cmd->execute_cmd = spc_emulate_report_luns;
+			break;
+		case READ_6:
+		case READ_10:
+		case READ_12:
+		case READ_16:
+		case WRITE_6:
+		case WRITE_10:
+		case WRITE_12:
+		case WRITE_16:
+		case WRITE_VERIFY:
+			cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
+			/* FALLTHROUGH */
+		default:
+			cmd->execute_cmd = tcmu_pass_op;
+		}
+		ret = TCM_NO_SENSE;
+		break;
+	case TCMU_PASS_IO:
+		ret = sbc_parse_cdb(cmd, &tcmu_sbc_ops);
+		break;
+	default:
+		pr_err("Unknown tcm-user pass level %d\n", udev->pass_level);
+		ret = TCM_CHECK_CONDITION_ABORT_CMD;
+	}
+
+	return ret;
+}
+
+static struct se_subsystem_api tcmu_template = {
+	.name			= "user",
+	.inquiry_prod		= "USER",
+	.inquiry_rev		= TCMU_VERSION,
+	.owner			= THIS_MODULE,
+	.transport_type		= TRANSPORT_PLUGIN_VHBA_PDEV,
+	.attach_hba		= tcmu_attach_hba,
+	.detach_hba		= tcmu_detach_hba,
+	.alloc_device		= tcmu_alloc_device,
+	.configure_device	= tcmu_configure_device,
+	.free_device		= tcmu_free_device,
+	.parse_cdb		= tcmu_parse_cdb,
+	.set_configfs_dev_params = tcmu_set_configfs_dev_params,
+	.show_configfs_dev_params = tcmu_show_configfs_dev_params,
+	.get_device_type	= sbc_get_device_type,
+	.get_blocks		= tcmu_get_blocks,
+};
+
+static int __init tcmu_module_init(void)
+{
+	int ret;
+
+	BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0);
+
+	tcmu_cmd_cache = kmem_cache_create("tcmu_cmd_cache",
+				sizeof(struct tcmu_cmd),
+				__alignof__(struct tcmu_cmd),
+				0, NULL);
+	if (!tcmu_cmd_cache)
+		return -ENOMEM;
+
+	tcmu_root_device = root_device_register("tcm_user");
+	if (IS_ERR(tcmu_root_device)) {
+		ret = PTR_ERR(tcmu_root_device);
+		goto out_free_cache;
+	}
+
+	ret = genl_register_family(&tcmu_genl_family);
+	if (ret < 0) {
+		goto out_unreg_device;
+	}
+
+	ret = transport_subsystem_register(&tcmu_template);
+	if (ret)
+		goto out_unreg_genl;
+
+	return 0;
+
+out_unreg_genl:
+	genl_unregister_family(&tcmu_genl_family);
+out_unreg_device:
+	root_device_unregister(tcmu_root_device);
+out_free_cache:
+	kmem_cache_destroy(tcmu_cmd_cache);
+
+	return ret;
+}
+
+static void __exit tcmu_module_exit(void)
+{
+	transport_subsystem_release(&tcmu_template);
+	genl_unregister_family(&tcmu_genl_family);
+	root_device_unregister(tcmu_root_device);
+	kmem_cache_destroy(tcmu_cmd_cache);
+}
+
+MODULE_DESCRIPTION("TCM USER subsystem plugin");
+MODULE_AUTHOR("Shaohua Li <shli@kernel.org>");
+MODULE_AUTHOR("Andy Grover <agrover@redhat.com>");
+MODULE_LICENSE("GPL");
+
+module_init(tcmu_module_init);
+module_exit(tcmu_module_exit);
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index be88166349a1..6ebd0d1faf2e 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -371,6 +371,7 @@ header-y += swab.h
 header-y += synclink.h
 header-y += sysctl.h
 header-y += sysinfo.h
+header-y += target_core_user.h
 header-y += taskstats.h
 header-y += tcp.h
 header-y += tcp_metrics.h
diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
new file mode 100644
index 000000000000..7dcfbe6771b1
--- /dev/null
+++ b/include/uapi/linux/target_core_user.h
@@ -0,0 +1,142 @@
+#ifndef __TARGET_CORE_USER_H
+#define __TARGET_CORE_USER_H
+
+/* This header will be used by application too */
+
+#include <linux/types.h>
+#include <linux/uio.h>
+
+#ifndef __packed
+#define __packed                        __attribute__((packed))
+#endif
+
+#define TCMU_VERSION "1.0"
+
+/*
+ * Ring Design
+ * -----------
+ *
+ * The mmaped area is divided into three parts:
+ * 1) The mailbox (struct tcmu_mailbox, below)
+ * 2) The command ring
+ * 3) Everything beyond the command ring (data)
+ *
+ * The mailbox tells userspace the offset of the command ring from the
+ * start of the shared memory region, and how big the command ring is.
+ *
+ * The kernel passes SCSI commands to userspace by putting a struct
+ * tcmu_cmd_entry in the ring, updating mailbox->cmd_head, and poking
+ * userspace via uio's interrupt mechanism.
+ *
+ * tcmu_cmd_entry contains a header. If the header type is PAD,
+ * userspace should skip hdr->length bytes (mod cmdr_size) to find the
+ * next cmd_entry.
+ *
+ * Otherwise, the entry will contain offsets into the mmaped area that
+ * contain the cdb and data buffers -- the latter accessible via the
+ * iov array. iov addresses are also offsets into the shared area.
+ *
+ * When userspace is completed handling the command, set
+ * entry->rsp.scsi_status, fill in rsp.sense_buffer if appropriate,
+ * and also set mailbox->cmd_tail equal to the old cmd_tail plus
+ * hdr->length, mod cmdr_size. If cmd_tail doesn't equal cmd_head, it
+ * should process the next packet the same way, and so on.
+ */
+
+#define TCMU_MAILBOX_VERSION 1
+#define ALIGN_SIZE 64 /* Should be enough for most CPUs */
+
+struct tcmu_mailbox {
+	__u16 version;
+	__u16 flags;
+	__u32 cmdr_off;
+	__u32 cmdr_size;
+
+	__u32 cmd_head;
+
+	/* Updated by user. On its own cacheline */
+	__u32 cmd_tail __attribute__((__aligned__(ALIGN_SIZE)));
+
+} __packed;
+
+enum tcmu_opcode {
+	TCMU_OP_PAD = 0,
+	TCMU_OP_CMD,
+};
+
+/*
+ * Only a few opcodes, and length is 8-byte aligned, so use low bits for opcode.
+ */
+struct tcmu_cmd_entry_hdr {
+		__u32 len_op;
+} __packed;
+
+#define TCMU_OP_MASK 0x7
+
+static inline enum tcmu_opcode tcmu_hdr_get_op(struct tcmu_cmd_entry_hdr *hdr)
+{
+	return hdr->len_op & TCMU_OP_MASK;
+}
+
+static inline void tcmu_hdr_set_op(struct tcmu_cmd_entry_hdr *hdr, enum tcmu_opcode op)
+{
+	hdr->len_op &= ~TCMU_OP_MASK;
+	hdr->len_op |= (op & TCMU_OP_MASK);
+}
+
+static inline __u32 tcmu_hdr_get_len(struct tcmu_cmd_entry_hdr *hdr)
+{
+	return hdr->len_op & ~TCMU_OP_MASK;
+}
+
+static inline void tcmu_hdr_set_len(struct tcmu_cmd_entry_hdr *hdr, __u32 len)
+{
+	hdr->len_op &= TCMU_OP_MASK;
+	hdr->len_op |= len;
+}
+
+/* Currently the same as SCSI_SENSE_BUFFERSIZE */
+#define TCMU_SENSE_BUFFERSIZE 96
+
+struct tcmu_cmd_entry {
+	struct tcmu_cmd_entry_hdr hdr;
+
+	uint16_t cmd_id;
+	uint16_t __pad1;
+
+	union {
+		struct {
+			uint64_t cdb_off;
+			uint64_t iov_cnt;
+			struct iovec iov[0];
+		} req;
+		struct {
+			uint8_t scsi_status;
+			uint8_t __pad1;
+			uint16_t __pad2;
+			uint32_t __pad3;
+			char sense_buffer[TCMU_SENSE_BUFFERSIZE];
+		} rsp;
+	};
+
+} __packed;
+
+#define TCMU_OP_ALIGN_SIZE sizeof(uint64_t)
+
+enum tcmu_genl_cmd {
+	TCMU_CMD_UNSPEC,
+	TCMU_CMD_ADDED_DEVICE,
+	TCMU_CMD_REMOVED_DEVICE,
+	__TCMU_CMD_MAX,
+};
+#define TCMU_CMD_MAX (__TCMU_CMD_MAX - 1)
+
+enum tcmu_genl_attr {
+	TCMU_ATTR_UNSPEC,
+	TCMU_ATTR_DEVICE,
+	TCMU_ATTR_MINOR,
+	__TCMU_ATTR_MAX,
+};
+#define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1)
+
+#endif
-- 
cgit v1.2.3


From 37dd0247797b168ad1cc7f5dbec825a1ee66535b Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 3 Oct 2014 15:48:09 -0700
Subject: gue: Receive side for Generic UDP Encapsulation

This patch adds support receiving for GUE packets in the fou module. The
fou module now supports direct foo-over-udp (no encapsulation header)
and GUE. To support this a type parameter is added to the fou netlink
parameters.

For a GUE socket we define gue_udp_recv, gue_gro_receive, and
gue_gro_complete to handle the specifics of the GUE protocol. Most
of the code to manage and configure sockets is common with the fou.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gue.h        |  23 ++++++
 include/uapi/linux/fou.h |   7 ++
 net/ipv4/fou.c           | 196 ++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 217 insertions(+), 9 deletions(-)
 create mode 100644 include/net/gue.h

(limited to 'include/uapi')

diff --git a/include/net/gue.h b/include/net/gue.h
new file mode 100644
index 000000000000..b6c332788084
--- /dev/null
+++ b/include/net/gue.h
@@ -0,0 +1,23 @@
+#ifndef __NET_GUE_H
+#define __NET_GUE_H
+
+struct guehdr {
+	union {
+		struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+			__u8	hlen:4,
+			version:4;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+			__u8	version:4,
+				hlen:4;
+#else
+#error  "Please fix <asm/byteorder.h>"
+#endif
+			__u8    next_hdr;
+			__u16   flags;
+		};
+		__u32 word;
+	};
+};
+
+#endif
diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h
index e03376de453d..8df06894da23 100644
--- a/include/uapi/linux/fou.h
+++ b/include/uapi/linux/fou.h
@@ -13,6 +13,7 @@ enum {
 	FOU_ATTR_PORT,				/* u16 */
 	FOU_ATTR_AF,				/* u8 */
 	FOU_ATTR_IPPROTO,			/* u8 */
+	FOU_ATTR_TYPE,				/* u8 */
 
 	__FOU_ATTR_MAX,
 };
@@ -27,6 +28,12 @@ enum {
 	__FOU_CMD_MAX,
 };
 
+enum {
+	FOU_ENCAP_UNSPEC,
+	FOU_ENCAP_DIRECT,
+	FOU_ENCAP_GUE,
+};
+
 #define FOU_CMD_MAX	(__FOU_CMD_MAX - 1)
 
 #endif /* _UAPI_LINUX_FOU_H */
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 7e2126a31f2e..efa70ad44906 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -7,6 +7,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <net/genetlink.h>
+#include <net/gue.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/udp.h>
@@ -27,6 +28,7 @@ struct fou {
 };
 
 struct fou_cfg {
+	u16 type;
 	u8 protocol;
 	struct udp_port_cfg udp_config;
 };
@@ -64,6 +66,41 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
 					  sizeof(struct udphdr));
 }
 
+static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
+{
+	struct fou *fou = fou_from_sock(sk);
+	size_t len;
+	struct guehdr *guehdr;
+	struct udphdr *uh;
+
+	if (!fou)
+		return 1;
+
+	len = sizeof(struct udphdr) + sizeof(struct guehdr);
+	if (!pskb_may_pull(skb, len))
+		goto drop;
+
+	uh = udp_hdr(skb);
+	guehdr = (struct guehdr *)&uh[1];
+
+	len += guehdr->hlen << 2;
+	if (!pskb_may_pull(skb, len))
+		goto drop;
+
+	if (guehdr->version != 0)
+		goto drop;
+
+	if (guehdr->flags) {
+		/* No support yet */
+		goto drop;
+	}
+
+	return fou_udp_encap_recv_deliver(skb, guehdr->next_hdr, len);
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+
 static struct sk_buff **fou_gro_receive(struct sk_buff **head,
 					struct sk_buff *skb)
 {
@@ -107,6 +144,112 @@ out_unlock:
 	return err;
 }
 
+static struct sk_buff **gue_gro_receive(struct sk_buff **head,
+					struct sk_buff *skb)
+{
+	const struct net_offload **offloads;
+	const struct net_offload *ops;
+	struct sk_buff **pp = NULL;
+	struct sk_buff *p;
+	u8 proto;
+	struct guehdr *guehdr;
+	unsigned int hlen, guehlen;
+	unsigned int off;
+	int flush = 1;
+
+	off = skb_gro_offset(skb);
+	hlen = off + sizeof(*guehdr);
+	guehdr = skb_gro_header_fast(skb, off);
+	if (skb_gro_header_hard(skb, hlen)) {
+		guehdr = skb_gro_header_slow(skb, hlen, off);
+		if (unlikely(!guehdr))
+			goto out;
+	}
+
+	proto = guehdr->next_hdr;
+
+	rcu_read_lock();
+	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
+	ops = rcu_dereference(offloads[proto]);
+	if (WARN_ON(!ops || !ops->callbacks.gro_receive))
+		goto out_unlock;
+
+	guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+
+	hlen = off + guehlen;
+	if (skb_gro_header_hard(skb, hlen)) {
+		guehdr = skb_gro_header_slow(skb, hlen, off);
+		if (unlikely(!guehdr))
+			goto out_unlock;
+	}
+
+	flush = 0;
+
+	for (p = *head; p; p = p->next) {
+		const struct guehdr *guehdr2;
+
+		if (!NAPI_GRO_CB(p)->same_flow)
+			continue;
+
+		guehdr2 = (struct guehdr *)(p->data + off);
+
+		/* Compare base GUE header to be equal (covers
+		 * hlen, version, next_hdr, and flags.
+		 */
+		if (guehdr->word != guehdr2->word) {
+			NAPI_GRO_CB(p)->same_flow = 0;
+			continue;
+		}
+
+		/* Compare optional fields are the same. */
+		if (guehdr->hlen && memcmp(&guehdr[1], &guehdr2[1],
+					   guehdr->hlen << 2)) {
+			NAPI_GRO_CB(p)->same_flow = 0;
+			continue;
+		}
+	}
+
+	skb_gro_pull(skb, guehlen);
+
+	/* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
+	skb_gro_postpull_rcsum(skb, guehdr, guehlen);
+
+	pp = ops->callbacks.gro_receive(head, skb);
+
+out_unlock:
+	rcu_read_unlock();
+out:
+	NAPI_GRO_CB(skb)->flush |= flush;
+
+	return pp;
+}
+
+static int gue_gro_complete(struct sk_buff *skb, int nhoff)
+{
+	const struct net_offload **offloads;
+	struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
+	const struct net_offload *ops;
+	unsigned int guehlen;
+	u8 proto;
+	int err = -ENOENT;
+
+	proto = guehdr->next_hdr;
+
+	guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+
+	rcu_read_lock();
+	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
+	ops = rcu_dereference(offloads[proto]);
+	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
+		goto out_unlock;
+
+	err = ops->callbacks.gro_complete(skb, nhoff + guehlen);
+
+out_unlock:
+	rcu_read_unlock();
+	return err;
+}
+
 static int fou_add_to_port_list(struct fou *fou)
 {
 	struct fou *fout;
@@ -142,6 +285,28 @@ static void fou_release(struct fou *fou)
 	kfree(fou);
 }
 
+static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
+{
+	udp_sk(sk)->encap_rcv = fou_udp_recv;
+	fou->protocol = cfg->protocol;
+	fou->udp_offloads.callbacks.gro_receive = fou_gro_receive;
+	fou->udp_offloads.callbacks.gro_complete = fou_gro_complete;
+	fou->udp_offloads.port = cfg->udp_config.local_udp_port;
+	fou->udp_offloads.ipproto = cfg->protocol;
+
+	return 0;
+}
+
+static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
+{
+	udp_sk(sk)->encap_rcv = gue_udp_recv;
+	fou->udp_offloads.callbacks.gro_receive = gue_gro_receive;
+	fou->udp_offloads.callbacks.gro_complete = gue_gro_complete;
+	fou->udp_offloads.port = cfg->udp_config.local_udp_port;
+
+	return 0;
+}
+
 static int fou_create(struct net *net, struct fou_cfg *cfg,
 		      struct socket **sockp)
 {
@@ -164,10 +329,24 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
 
 	sk = sock->sk;
 
-	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
-	fou->protocol = cfg->protocol;
-	fou->port =  cfg->udp_config.local_udp_port;
-	udp_sk(sk)->encap_rcv = fou_udp_recv;
+	fou->port = cfg->udp_config.local_udp_port;
+
+	/* Initial for fou type */
+	switch (cfg->type) {
+	case FOU_ENCAP_DIRECT:
+		err = fou_encap_init(sk, fou, cfg);
+		if (err)
+			goto error;
+		break;
+	case FOU_ENCAP_GUE:
+		err = gue_encap_init(sk, fou, cfg);
+		if (err)
+			goto error;
+		break;
+	default:
+		err = -EINVAL;
+		goto error;
+	}
 
 	udp_sk(sk)->encap_type = 1;
 	udp_encap_enable();
@@ -179,11 +358,6 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
 
 	sk->sk_allocation = GFP_ATOMIC;
 
-	fou->udp_offloads.callbacks.gro_receive = fou_gro_receive;
-	fou->udp_offloads.callbacks.gro_complete = fou_gro_complete;
-	fou->udp_offloads.port = cfg->udp_config.local_udp_port;
-	fou->udp_offloads.ipproto = cfg->protocol;
-
 	if (cfg->udp_config.family == AF_INET) {
 		err = udp_add_offload(&fou->udp_offloads);
 		if (err)
@@ -240,6 +414,7 @@ static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
 	[FOU_ATTR_PORT] = { .type = NLA_U16, },
 	[FOU_ATTR_AF] = { .type = NLA_U8, },
 	[FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
+	[FOU_ATTR_TYPE] = { .type = NLA_U8, },
 };
 
 static int parse_nl_config(struct genl_info *info,
@@ -267,6 +442,9 @@ static int parse_nl_config(struct genl_info *info,
 	if (info->attrs[FOU_ATTR_IPPROTO])
 		cfg->protocol = nla_get_u8(info->attrs[FOU_ATTR_IPPROTO]);
 
+	if (info->attrs[FOU_ATTR_TYPE])
+		cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From bc1fc390e1728672b5b343b85185fcc1fe41043b Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 3 Oct 2014 15:48:10 -0700
Subject: ip_tunnel: Add GUE support

This patch allows configuring IPIP, sit, and GRE tunnels to use GUE.
This is very similar to fou excpet that we need to insert the GUE header
in addition to the UDP header on transmit.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_tunnel.h |  1 +
 net/ipv4/ip_tunnel.c           | 13 +++++++++++++
 2 files changed, 14 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 7c832afdfa94..280d9e092283 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -64,6 +64,7 @@ enum {
 enum tunnel_encap_types {
 	TUNNEL_ENCAP_NONE,
 	TUNNEL_ENCAP_FOU,
+	TUNNEL_ENCAP_GUE,
 };
 
 #define TUNNEL_ENCAP_FLAG_CSUM		(1<<0)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index d9c9dc4ffeaf..0bb8e141eacc 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -56,6 +56,7 @@
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
 #include <net/udp.h>
+#include <net/gue.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
@@ -495,6 +496,8 @@ static int ip_encap_hlen(struct ip_tunnel_encap *e)
 		return 0;
 	case TUNNEL_ENCAP_FOU:
 		return sizeof(struct udphdr);
+	case TUNNEL_ENCAP_GUE:
+		return sizeof(struct udphdr) + sizeof(struct guehdr);
 	default:
 		return -EINVAL;
 	}
@@ -546,6 +549,15 @@ static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
 	skb_reset_transport_header(skb);
 	uh = udp_hdr(skb);
 
+	if (e->type == TUNNEL_ENCAP_GUE) {
+		struct guehdr *guehdr = (struct guehdr *)&uh[1];
+
+		guehdr->version = 0;
+		guehdr->hlen = 0;
+		guehdr->flags = 0;
+		guehdr->next_hdr = *protocol;
+	}
+
 	uh->dest = e->dport;
 	uh->source = sport;
 	uh->len = htons(skb->len);
@@ -565,6 +577,7 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
 	case TUNNEL_ENCAP_NONE:
 		return 0;
 	case TUNNEL_ENCAP_FOU:
+	case TUNNEL_ENCAP_GUE:
 		return fou_build_header(skb, &t->encap, t->encap_hlen,
 					protocol, fl4);
 	default:
-- 
cgit v1.2.3


From 86f1152b117a404229fd6f08ec3faca779f37b92 Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Wed, 13 Aug 2014 13:53:43 -0500
Subject: dm: allow active and inactive tables to share dm_devs

Until this change, when loading a new DM table, DM core would re-open
all of the devices in the DM table.  Now, DM core will avoid redundant
device opens (and closes when destroying the old table) if the old
table already has a device open using the same mode.  This is achieved
by managing reference counts on the table_devices that DM core now
stores in the mapped_device structure (rather than in the dm_table
structure).  So a mapped_device's active and inactive dm_tables' dm_dev
lists now just point to the dm_devs stored in the mapped_device's
table_devices list.

This improvement in DM core's device reference counting has the
side-effect of fixing a long-standing limitation of the multipath
target: a DM multipath table couldn't include any paths that were unusable
(failed).  For example: if all paths have failed and you add a new,
working, path to the table; you can't use it since the table load would
fail due to it still containing failed paths.  Now a re-load of a
multipath table can include failed devices and when those devices become
active again they can be used instantly.

The device list code in dm.c isn't a straight copy/paste from the code in
dm-table.c, but it's very close (aside from some variable renames).  One
subtle difference is that find_table_device for the tables_devices list
will only match devices with the same name and mode.  This is because we
don't want to upgrade a device's mode in the active table when an
inactive table is loaded.

Access to the mapped_device structure's tables_devices list requires a
mutex (tables_devices_lock), so that tables cannot be created and
destroyed concurrently.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-ioctl.c         |   2 +-
 drivers/md/dm-table.c         | 102 ++++++++++++----------------------
 drivers/md/dm.c               | 126 ++++++++++++++++++++++++++++++++++++++++++
 drivers/md/dm.h               |   5 +-
 include/uapi/linux/dm-ioctl.h |   4 +-
 5 files changed, 167 insertions(+), 72 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 51521429fb59..0be9381365d7 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1418,7 +1418,7 @@ static void retrieve_deps(struct dm_table *table,
 	deps->count = count;
 	count = 0;
 	list_for_each_entry (dd, dm_table_get_devices(table), list)
-		deps->dev[count++] = huge_encode_dev(dd->dm_dev.bdev->bd_dev);
+		deps->dev[count++] = huge_encode_dev(dd->dm_dev->bdev->bd_dev);
 
 	param->data_size = param->data_start + needed;
 }
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index f9c6cb8dbcf8..b2bd1ebf4562 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -210,15 +210,16 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
 	return 0;
 }
 
-static void free_devices(struct list_head *devices)
+static void free_devices(struct list_head *devices, struct mapped_device *md)
 {
 	struct list_head *tmp, *next;
 
 	list_for_each_safe(tmp, next, devices) {
 		struct dm_dev_internal *dd =
 		    list_entry(tmp, struct dm_dev_internal, list);
-		DMWARN("dm_table_destroy: dm_put_device call missing for %s",
-		       dd->dm_dev.name);
+		DMWARN("%s: dm_table_destroy: dm_put_device call missing for %s",
+		       dm_device_name(md), dd->dm_dev->name);
+		dm_put_table_device(md, dd->dm_dev);
 		kfree(dd);
 	}
 }
@@ -247,7 +248,7 @@ void dm_table_destroy(struct dm_table *t)
 	vfree(t->highs);
 
 	/* free the device list */
-	free_devices(&t->devices);
+	free_devices(&t->devices, t->md);
 
 	dm_free_md_mempools(t->mempools);
 
@@ -262,52 +263,12 @@ static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev)
 	struct dm_dev_internal *dd;
 
 	list_for_each_entry (dd, l, list)
-		if (dd->dm_dev.bdev->bd_dev == dev)
+		if (dd->dm_dev->bdev->bd_dev == dev)
 			return dd;
 
 	return NULL;
 }
 
-/*
- * Open a device so we can use it as a map destination.
- */
-static int open_dev(struct dm_dev_internal *d, dev_t dev,
-		    struct mapped_device *md)
-{
-	static char *_claim_ptr = "I belong to device-mapper";
-	struct block_device *bdev;
-
-	int r;
-
-	BUG_ON(d->dm_dev.bdev);
-
-	bdev = blkdev_get_by_dev(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr);
-	if (IS_ERR(bdev))
-		return PTR_ERR(bdev);
-
-	r = bd_link_disk_holder(bdev, dm_disk(md));
-	if (r) {
-		blkdev_put(bdev, d->dm_dev.mode | FMODE_EXCL);
-		return r;
-	}
-
-	d->dm_dev.bdev = bdev;
-	return 0;
-}
-
-/*
- * Close a device that we've been using.
- */
-static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
-{
-	if (!d->dm_dev.bdev)
-		return;
-
-	bd_unlink_disk_holder(d->dm_dev.bdev, dm_disk(md));
-	blkdev_put(d->dm_dev.bdev, d->dm_dev.mode | FMODE_EXCL);
-	d->dm_dev.bdev = NULL;
-}
-
 /*
  * If possible, this checks an area of a destination device is invalid.
  */
@@ -386,19 +347,17 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
 			struct mapped_device *md)
 {
 	int r;
-	struct dm_dev_internal dd_new, dd_old;
+	struct dm_dev *old_dev, *new_dev;
 
-	dd_new = dd_old = *dd;
+	old_dev = dd->dm_dev;
 
-	dd_new.dm_dev.mode |= new_mode;
-	dd_new.dm_dev.bdev = NULL;
-
-	r = open_dev(&dd_new, dd->dm_dev.bdev->bd_dev, md);
+	r = dm_get_table_device(md, dd->dm_dev->bdev->bd_dev,
+				dd->dm_dev->mode | new_mode, &new_dev);
 	if (r)
 		return r;
 
-	dd->dm_dev.mode |= new_mode;
-	close_dev(&dd_old, md);
+	dd->dm_dev = new_dev;
+	dm_put_table_device(md, old_dev);
 
 	return 0;
 }
@@ -440,27 +399,22 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
 		if (!dd)
 			return -ENOMEM;
 
-		dd->dm_dev.mode = mode;
-		dd->dm_dev.bdev = NULL;
-
-		if ((r = open_dev(dd, dev, t->md))) {
+		if ((r = dm_get_table_device(t->md, dev, mode, &dd->dm_dev))) {
 			kfree(dd);
 			return r;
 		}
 
-		format_dev_t(dd->dm_dev.name, dev);
-
 		atomic_set(&dd->count, 0);
 		list_add(&dd->list, &t->devices);
 
-	} else if (dd->dm_dev.mode != (mode | dd->dm_dev.mode)) {
+	} else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) {
 		r = upgrade_mode(dd, mode, t->md);
 		if (r)
 			return r;
 	}
 	atomic_inc(&dd->count);
 
-	*result = &dd->dm_dev;
+	*result = dd->dm_dev;
 	return 0;
 }
 EXPORT_SYMBOL(dm_get_device);
@@ -505,11 +459,23 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
  */
 void dm_put_device(struct dm_target *ti, struct dm_dev *d)
 {
-	struct dm_dev_internal *dd = container_of(d, struct dm_dev_internal,
-						  dm_dev);
+	int found = 0;
+	struct list_head *devices = &ti->table->devices;
+	struct dm_dev_internal *dd;
 
+	list_for_each_entry(dd, devices, list) {
+		if (dd->dm_dev == d) {
+			found = 1;
+			break;
+		}
+	}
+	if (!found) {
+		DMWARN("%s: device %s not in table devices list",
+		       dm_device_name(ti->table->md), d->name);
+		return;
+	}
 	if (atomic_dec_and_test(&dd->count)) {
-		close_dev(dd, ti->table->md);
+		dm_put_table_device(ti->table->md, d);
 		list_del(&dd->list);
 		kfree(dd);
 	}
@@ -906,7 +872,7 @@ static int dm_table_set_type(struct dm_table *t)
 	/* Non-request-stackable devices can't be used for request-based dm */
 	devices = dm_table_get_devices(t);
 	list_for_each_entry(dd, devices, list) {
-		if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev.bdev))) {
+		if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev->bdev))) {
 			DMWARN("table load rejected: including"
 			       " non-request-stackable devices");
 			return -EINVAL;
@@ -1043,7 +1009,7 @@ static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t,
 	struct gendisk *prev_disk = NULL, *template_disk = NULL;
 
 	list_for_each_entry(dd, devices, list) {
-		template_disk = dd->dm_dev.bdev->bd_disk;
+		template_disk = dd->dm_dev->bdev->bd_disk;
 		if (!blk_get_integrity(template_disk))
 			goto no_integrity;
 		if (!match_all && !blk_integrity_is_initialized(template_disk))
@@ -1629,7 +1595,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits)
 	int r = 0;
 
 	list_for_each_entry(dd, devices, list) {
-		struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev);
+		struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev);
 		char b[BDEVNAME_SIZE];
 
 		if (likely(q))
@@ -1637,7 +1603,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits)
 		else
 			DMWARN_LIMIT("%s: any_congested: nonexistent device %s",
 				     dm_device_name(t->md),
-				     bdevname(dd->dm_dev.bdev, b));
+				     bdevname(dd->dm_dev->bdev, b));
 	}
 
 	list_for_each_entry(cb, &t->target_callbacks, list)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 56a2c74c9a3f..58f3927fd7cc 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -142,6 +142,9 @@ struct mapped_device {
 	 */
 	struct dm_table *map;
 
+	struct list_head table_devices;
+	struct mutex table_devices_lock;
+
 	unsigned long flags;
 
 	struct request_queue *queue;
@@ -212,6 +215,12 @@ struct dm_md_mempools {
 	struct bio_set *bs;
 };
 
+struct table_device {
+	struct list_head list;
+	atomic_t count;
+	struct dm_dev dm_dev;
+};
+
 #define RESERVED_BIO_BASED_IOS		16
 #define RESERVED_REQUEST_BASED_IOS	256
 #define RESERVED_MAX_IOS		1024
@@ -669,6 +678,120 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
 	rcu_read_unlock();
 }
 
+/*
+ * Open a table device so we can use it as a map destination.
+ */
+static int open_table_device(struct table_device *td, dev_t dev,
+			     struct mapped_device *md)
+{
+	static char *_claim_ptr = "I belong to device-mapper";
+	struct block_device *bdev;
+
+	int r;
+
+	BUG_ON(td->dm_dev.bdev);
+
+	bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _claim_ptr);
+	if (IS_ERR(bdev))
+		return PTR_ERR(bdev);
+
+	r = bd_link_disk_holder(bdev, dm_disk(md));
+	if (r) {
+		blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL);
+		return r;
+	}
+
+	td->dm_dev.bdev = bdev;
+	return 0;
+}
+
+/*
+ * Close a table device that we've been using.
+ */
+static void close_table_device(struct table_device *td, struct mapped_device *md)
+{
+	if (!td->dm_dev.bdev)
+		return;
+
+	bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
+	blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
+	td->dm_dev.bdev = NULL;
+}
+
+static struct table_device *find_table_device(struct list_head *l, dev_t dev,
+					      fmode_t mode) {
+	struct table_device *td;
+
+	list_for_each_entry(td, l, list)
+		if (td->dm_dev.bdev->bd_dev == dev && td->dm_dev.mode == mode)
+			return td;
+
+	return NULL;
+}
+
+int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
+			struct dm_dev **result) {
+	int r;
+	struct table_device *td;
+
+	mutex_lock(&md->table_devices_lock);
+	td = find_table_device(&md->table_devices, dev, mode);
+	if (!td) {
+		td = kmalloc(sizeof(*td), GFP_KERNEL);
+		if (!td) {
+			mutex_unlock(&md->table_devices_lock);
+			return -ENOMEM;
+		}
+
+		td->dm_dev.mode = mode;
+		td->dm_dev.bdev = NULL;
+
+		if ((r = open_table_device(td, dev, md))) {
+			mutex_unlock(&md->table_devices_lock);
+			kfree(td);
+			return r;
+		}
+
+		format_dev_t(td->dm_dev.name, dev);
+
+		atomic_set(&td->count, 0);
+		list_add(&td->list, &md->table_devices);
+	}
+	atomic_inc(&td->count);
+	mutex_unlock(&md->table_devices_lock);
+
+	*result = &td->dm_dev;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dm_get_table_device);
+
+void dm_put_table_device(struct mapped_device *md, struct dm_dev *d)
+{
+	struct table_device *td = container_of(d, struct table_device, dm_dev);
+
+	mutex_lock(&md->table_devices_lock);
+	if (atomic_dec_and_test(&td->count)) {
+		close_table_device(td, md);
+		list_del(&td->list);
+		kfree(td);
+	}
+	mutex_unlock(&md->table_devices_lock);
+}
+EXPORT_SYMBOL(dm_put_table_device);
+
+static void free_table_devices(struct list_head *devices)
+{
+	struct list_head *tmp, *next;
+
+	list_for_each_safe(tmp, next, devices) {
+		struct table_device *td = list_entry(tmp, struct table_device, list);
+
+		DMWARN("dm_destroy: %s still exists with %d references",
+		       td->dm_dev.name, atomic_read(&td->count));
+		kfree(td);
+	}
+}
+
 /*
  * Get the geometry associated with a dm device
  */
@@ -1944,12 +2067,14 @@ static struct mapped_device *alloc_dev(int minor)
 	md->type = DM_TYPE_NONE;
 	mutex_init(&md->suspend_lock);
 	mutex_init(&md->type_lock);
+	mutex_init(&md->table_devices_lock);
 	spin_lock_init(&md->deferred_lock);
 	atomic_set(&md->holders, 1);
 	atomic_set(&md->open_count, 0);
 	atomic_set(&md->event_nr, 0);
 	atomic_set(&md->uevent_seq, 0);
 	INIT_LIST_HEAD(&md->uevent_list);
+	INIT_LIST_HEAD(&md->table_devices);
 	spin_lock_init(&md->uevent_lock);
 
 	md->queue = blk_alloc_queue(GFP_KERNEL);
@@ -2035,6 +2160,7 @@ static void free_dev(struct mapped_device *md)
 	blk_integrity_unregister(md->disk);
 	del_gendisk(md->disk);
 	cleanup_srcu_struct(&md->io_barrier);
+	free_table_devices(&md->table_devices);
 	free_minor(minor);
 
 	spin_lock(&_minor_lock);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index e81d2152fa68..988c7fb7b145 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -44,7 +44,7 @@
 struct dm_dev_internal {
 	struct list_head list;
 	atomic_t count;
-	struct dm_dev dm_dev;
+	struct dm_dev *dm_dev;
 };
 
 struct dm_table;
@@ -188,6 +188,9 @@ int dm_cancel_deferred_remove(struct mapped_device *md);
 int dm_request_based(struct mapped_device *md);
 sector_t dm_get_size(struct mapped_device *md);
 struct request_queue *dm_get_md_queue(struct mapped_device *md);
+int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
+			struct dm_dev **result);
+void dm_put_table_device(struct mapped_device *md, struct dm_dev *d);
 struct dm_stats *dm_get_stats(struct mapped_device *md);
 
 int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index c8a4302093a3..3315ab21f728 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -267,9 +267,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	27
+#define DM_VERSION_MINOR	28
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2013-10-30)"
+#define DM_VERSION_EXTRA	"-ioctl (2014-09-17)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
-- 
cgit v1.2.3


From 67fa034194bf82a3d5ca841759d921297daa63ca Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 3 Oct 2014 15:35:30 -0700
Subject: openvswitch: Add support for matching on OAM packets.

Some tunnel formats have mechanisms for indicating that packets are
OAM frames that should be handled specially (either as high priority or
not forwarded beyond an endpoint). This provides support for allowing
those types of packets to be matched.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/openvswitch.h |  1 +
 net/openvswitch/datapath.c       |  1 +
 net/openvswitch/flow_netlink.c   | 17 ++++++++++++-----
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index f7fc507d82ab..7c06106f5af5 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -309,6 +309,7 @@ enum ovs_tunnel_key_attr {
 	OVS_TUNNEL_KEY_ATTR_TTL,                /* u8 Tunnel IP TTL. */
 	OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT,      /* No argument, set DF. */
 	OVS_TUNNEL_KEY_ATTR_CSUM,               /* No argument. CSUM packet. */
+	OVS_TUNNEL_KEY_ATTR_OAM,                /* No argument. OAM frame.  */
 	__OVS_TUNNEL_KEY_ATTR_MAX
 };
 
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 9e3a2fae6a8f..f6bd93d5f435 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -369,6 +369,7 @@ static size_t key_attr_size(void)
 		  + nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TTL */
 		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
 		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_CSUM */
+		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_OAM */
 		+ nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
 		+ nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
 		+ nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index f4c8daa73965..22c855fa0bc2 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -346,6 +346,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 			[OVS_TUNNEL_KEY_ATTR_TTL] = 1,
 			[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
 			[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
+			[OVS_TUNNEL_KEY_ATTR_OAM] = 0,
 		};
 
 		if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
@@ -390,6 +391,9 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 		case OVS_TUNNEL_KEY_ATTR_CSUM:
 			tun_flags |= TUNNEL_CSUM;
 			break;
+		case OVS_TUNNEL_KEY_ATTR_OAM:
+			tun_flags |= TUNNEL_OAM;
+			break;
 		default:
 			return -EINVAL;
 		}
@@ -431,21 +435,24 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
 	    nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
 		return -EMSGSIZE;
 	if (output->ipv4_src &&
-		nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
+	    nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
 		return -EMSGSIZE;
 	if (output->ipv4_dst &&
-		nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
+	    nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
 		return -EMSGSIZE;
 	if (output->ipv4_tos &&
-		nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
+	    nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
 		return -EMSGSIZE;
 	if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
 		return -EMSGSIZE;
 	if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
-		nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
+	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
 		return -EMSGSIZE;
 	if ((output->tun_flags & TUNNEL_CSUM) &&
-		nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
+	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
+		return -EMSGSIZE;
+	if ((output->tun_flags & TUNNEL_OAM) &&
+	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
 		return -EMSGSIZE;
 
 	nla_nest_end(skb, nla);
-- 
cgit v1.2.3


From f0b128c1e2cc33ad104daf0f51a51e34f7763c5f Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 3 Oct 2014 15:35:31 -0700
Subject: openvswitch: Wrap struct ovs_key_ipv4_tunnel in a new structure.

Currently, the flow information that is matched for tunnels and
the tunnel data passed around with packets is the same. However,
as additional information is added this is not necessarily desirable,
as in the case of pointers.

This adds a new structure for tunnel metadata which currently contains
only the existing struct. This change is purely internal to the kernel
since the current OVS_KEY_ATTR_IPV4_TUNNEL is simply a compressed version
of OVS_KEY_ATTR_TUNNEL that is translated at flow setup.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/openvswitch.h |  2 +-
 net/openvswitch/actions.c        |  5 +++--
 net/openvswitch/datapath.h       |  2 +-
 net/openvswitch/flow.c           |  6 +++---
 net/openvswitch/flow.h           | 30 +++++++++++++++++-------------
 net/openvswitch/flow_netlink.c   | 38 +++++++++++++++++++++++++++++++-------
 net/openvswitch/vport-gre.c      | 16 +++++++++-------
 net/openvswitch/vport-vxlan.c    | 10 +++++-----
 net/openvswitch/vport.c          |  6 +++---
 net/openvswitch/vport.h          |  2 +-
 10 files changed, 74 insertions(+), 43 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 7c06106f5af5..6753032832e2 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -294,7 +294,7 @@ enum ovs_key_attr {
 	OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */
 
 #ifdef __KERNEL__
-	OVS_KEY_ATTR_IPV4_TUNNEL,  /* struct ovs_key_ipv4_tunnel */
+	OVS_KEY_ATTR_TUNNEL_INFO,  /* struct ovs_tunnel_info */
 #endif
 	__OVS_KEY_ATTR_MAX
 };
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 6932a42e41a2..006886dbee36 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -590,8 +590,8 @@ static int execute_set_action(struct sk_buff *skb,
 		skb->mark = nla_get_u32(nested_attr);
 		break;
 
-	case OVS_KEY_ATTR_IPV4_TUNNEL:
-		OVS_CB(skb)->egress_tun_key = nla_data(nested_attr);
+	case OVS_KEY_ATTR_TUNNEL_INFO:
+		OVS_CB(skb)->egress_tun_info = nla_data(nested_attr);
 		break;
 
 	case OVS_KEY_ATTR_ETHERNET:
@@ -778,6 +778,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
 	acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
 
 	this_cpu_inc(exec_actions_level);
+	OVS_CB(skb)->egress_tun_info = NULL;
 	err = do_execute_actions(dp, skb, key,
 				 acts->actions, acts->actions_len);
 
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index ac3f3df96961..974135439c5c 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -102,8 +102,8 @@ struct datapath {
  */
 struct ovs_skb_cb {
 	struct sw_flow		*flow;
+	struct ovs_tunnel_info  *egress_tun_info;
 	struct vport		*input_vport;
-	struct ovs_key_ipv4_tunnel  *egress_tun_key;
 };
 #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
 
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 913bdc1a83c0..2924cb340868 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -642,12 +642,12 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
 	return key_extract(skb, key);
 }
 
-int ovs_flow_key_extract(struct ovs_key_ipv4_tunnel *tun_key,
+int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info,
 			 struct sk_buff *skb, struct sw_flow_key *key)
 {
 	/* Extract metadata from packet. */
-	if (tun_key)
-		memcpy(&key->tun_key, tun_key, sizeof(key->tun_key));
+	if (tun_info)
+		memcpy(&key->tun_key, &tun_info->tunnel, sizeof(key->tun_key));
 	else
 		memset(&key->tun_key, 0, sizeof(key->tun_key));
 
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 0f5db4ec565d..fe5a71b81c1f 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -49,20 +49,24 @@ struct ovs_key_ipv4_tunnel {
 	u8   ipv4_ttl;
 } __packed __aligned(4); /* Minimize padding. */
 
-static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
-					 const struct iphdr *iph, __be64 tun_id,
-					 __be16 tun_flags)
+struct ovs_tunnel_info {
+	struct ovs_key_ipv4_tunnel tunnel;
+};
+
+static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
+					  const struct iphdr *iph,
+					  __be64 tun_id, __be16 tun_flags)
 {
-	tun_key->tun_id = tun_id;
-	tun_key->ipv4_src = iph->saddr;
-	tun_key->ipv4_dst = iph->daddr;
-	tun_key->ipv4_tos = iph->tos;
-	tun_key->ipv4_ttl = iph->ttl;
-	tun_key->tun_flags = tun_flags;
+	tun_info->tunnel.tun_id = tun_id;
+	tun_info->tunnel.ipv4_src = iph->saddr;
+	tun_info->tunnel.ipv4_dst = iph->daddr;
+	tun_info->tunnel.ipv4_tos = iph->tos;
+	tun_info->tunnel.ipv4_ttl = iph->ttl;
+	tun_info->tunnel.tun_flags = tun_flags;
 
 	/* clear struct padding. */
-	memset((unsigned char *) tun_key + OVS_TUNNEL_KEY_SIZE, 0,
-	       sizeof(*tun_key) - OVS_TUNNEL_KEY_SIZE);
+	memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0,
+	       sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE);
 }
 
 struct sw_flow_key {
@@ -190,8 +194,8 @@ void ovs_flow_stats_clear(struct sw_flow *);
 u64 ovs_flow_used_time(unsigned long flow_jiffies);
 
 int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
-int ovs_flow_key_extract(struct ovs_key_ipv4_tunnel *tun_key,
-			 struct sk_buff *skb, struct sw_flow_key *key);
+int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info, struct sk_buff *skb,
+			 struct sw_flow_key *key);
 /* Extract key from packet coming from userspace. */
 int ovs_flow_key_extract_userspace(const struct nlattr *attr,
 				   struct sk_buff *skb,
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 22c855fa0bc2..5d6194d9dadc 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1148,13 +1148,14 @@ out:
 	return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
 }
 
-static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
+static struct nlattr *__add_action(struct sw_flow_actions **sfa,
+				   int attrtype, void *data, int len)
 {
 	struct nlattr *a;
 
 	a = reserve_sfa_size(sfa, nla_attr_size(len));
 	if (IS_ERR(a))
-		return PTR_ERR(a);
+		return a;
 
 	a->nla_type = attrtype;
 	a->nla_len = nla_attr_size(len);
@@ -1163,6 +1164,18 @@ static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, in
 		memcpy(nla_data(a), data, len);
 	memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
 
+	return a;
+}
+
+static int add_action(struct sw_flow_actions **sfa, int attrtype,
+		      void *data, int len)
+{
+	struct nlattr *a;
+
+	a = __add_action(sfa, attrtype, data, len);
+	if (IS_ERR(a))
+		return PTR_ERR(a);
+
 	return 0;
 }
 
@@ -1268,6 +1281,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 {
 	struct sw_flow_match match;
 	struct sw_flow_key key;
+	struct ovs_tunnel_info *tun_info;
+	struct nlattr *a;
 	int err, start;
 
 	ovs_match_init(&match, &key, NULL);
@@ -1279,8 +1294,14 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	if (start < 0)
 		return start;
 
-	err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key,
-			sizeof(match.key->tun_key));
+	a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
+			 sizeof(*tun_info));
+	if (IS_ERR(a))
+		return PTR_ERR(a);
+
+	tun_info = nla_data(a);
+	tun_info->tunnel = key.tun_key;
+
 	add_nested_action_end(*sfa, start);
 
 	return err;
@@ -1563,17 +1584,20 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
 	int err;
 
 	switch (key_type) {
-	case OVS_KEY_ATTR_IPV4_TUNNEL:
+	case OVS_KEY_ATTR_TUNNEL_INFO: {
+		struct ovs_tunnel_info *tun_info = nla_data(ovs_key);
+
 		start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
 		if (!start)
 			return -EMSGSIZE;
 
-		err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key),
-					     nla_data(ovs_key));
+		err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel,
+					 nla_data(ovs_key));
 		if (err)
 			return err;
 		nla_nest_end(skb, start);
 		break;
+	}
 	default:
 		if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
 			return -EMSGSIZE;
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 309cca6e816f..fe768bd600eb 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -63,8 +63,10 @@ static __be16 filter_tnl_flags(__be16 flags)
 static struct sk_buff *__build_header(struct sk_buff *skb,
 				      int tunnel_hlen)
 {
-	const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->egress_tun_key;
 	struct tnl_ptk_info tpi;
+	const struct ovs_key_ipv4_tunnel *tun_key;
+
+	tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
 
 	skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
 	if (IS_ERR(skb))
@@ -92,7 +94,7 @@ static __be64 key_to_tunnel_id(__be32 key, __be32 seq)
 static int gre_rcv(struct sk_buff *skb,
 		   const struct tnl_ptk_info *tpi)
 {
-	struct ovs_key_ipv4_tunnel tun_key;
+	struct ovs_tunnel_info tun_info;
 	struct ovs_net *ovs_net;
 	struct vport *vport;
 	__be64 key;
@@ -103,10 +105,10 @@ static int gre_rcv(struct sk_buff *skb,
 		return PACKET_REJECT;
 
 	key = key_to_tunnel_id(tpi->key, tpi->seq);
-	ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key,
-			      filter_tnl_flags(tpi->flags));
+	ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key,
+			       filter_tnl_flags(tpi->flags));
 
-	ovs_vport_receive(vport, skb, &tun_key);
+	ovs_vport_receive(vport, skb, &tun_info);
 	return PACKET_RCVD;
 }
 
@@ -137,12 +139,12 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 	__be16 df;
 	int err;
 
-	if (unlikely(!OVS_CB(skb)->egress_tun_key)) {
+	if (unlikely(!OVS_CB(skb)->egress_tun_info)) {
 		err = -EINVAL;
 		goto error;
 	}
 
-	tun_key = OVS_CB(skb)->egress_tun_key;
+	tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
 	/* Route lookup */
 	memset(&fl, 0, sizeof(fl));
 	fl.daddr = tun_key->ipv4_dst;
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index f19539bb8adc..5fbff2c1ee49 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -58,7 +58,7 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
 /* Called with rcu_read_lock and BH disabled. */
 static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
 {
-	struct ovs_key_ipv4_tunnel tun_key;
+	struct ovs_tunnel_info tun_info;
 	struct vport *vport = vs->data;
 	struct iphdr *iph;
 	__be64 key;
@@ -66,9 +66,9 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
 	/* Save outer tunnel values */
 	iph = ip_hdr(skb);
 	key = cpu_to_be64(ntohl(vx_vni) >> 8);
-	ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
+	ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY);
 
-	ovs_vport_receive(vport, skb, &tun_key);
+	ovs_vport_receive(vport, skb, &tun_info);
 }
 
 static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
@@ -147,12 +147,12 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	__be16 df;
 	int err;
 
-	if (unlikely(!OVS_CB(skb)->egress_tun_key)) {
+	if (unlikely(!OVS_CB(skb)->egress_tun_info)) {
 		err = -EINVAL;
 		goto error;
 	}
 
-	tun_key = OVS_CB(skb)->egress_tun_key;
+	tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
 	/* Route lookup */
 	memset(&fl, 0, sizeof(fl));
 	fl.daddr = tun_key->ipv4_dst;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 5df8377fcfb1..3e50ee8a218c 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -432,7 +432,7 @@ u32 ovs_vport_find_upcall_portid(const struct vport *p, struct sk_buff *skb)
  * skb->data should point to the Ethernet header.
  */
 void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
-		       struct ovs_key_ipv4_tunnel *tun_key)
+		       struct ovs_tunnel_info *tun_info)
 {
 	struct pcpu_sw_netstats *stats;
 	struct sw_flow_key key;
@@ -445,9 +445,9 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
 	u64_stats_update_end(&stats->syncp);
 
 	OVS_CB(skb)->input_vport = vport;
-	OVS_CB(skb)->egress_tun_key = NULL;
+	OVS_CB(skb)->egress_tun_info = NULL;
 	/* Extract flow from 'skb' into 'key'. */
-	error = ovs_flow_key_extract(tun_key, skb, &key);
+	error = ovs_flow_key_extract(tun_info, skb, &key);
 	if (unlikely(error)) {
 		kfree_skb(skb);
 		return;
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 0efd62fef1e8..e28964aba021 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -207,7 +207,7 @@ static inline struct vport *vport_from_priv(void *priv)
 }
 
 void ovs_vport_receive(struct vport *, struct sk_buff *,
-		       struct ovs_key_ipv4_tunnel *);
+		       struct ovs_tunnel_info *);
 
 /* List of statically compiled vport implementations.  Don't forget to also
  * add yours to the list at the top of vport.c. */
-- 
cgit v1.2.3


From f5796684069e0c71c65bce6a6d4766114aec1396 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 3 Oct 2014 15:35:33 -0700
Subject: openvswitch: Add support for Geneve tunneling.

The Openvswitch implementation is completely agnostic to the options
that are in use and can handle newly defined options without
further work. It does this by simply matching on a byte array
of options and allowing userspace to setup flows on this array.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Singed-off-by: Ansis Atteka <aatteka@nicira.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Thomas Graf <tgraf@noironetworks.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h         |  21 ++--
 include/uapi/linux/openvswitch.h |   2 +
 net/openvswitch/Kconfig          |  11 ++
 net/openvswitch/Makefile         |   4 +
 net/openvswitch/datapath.c       |   5 +-
 net/openvswitch/flow.c           |  20 +++-
 net/openvswitch/flow.h           |  20 +++-
 net/openvswitch/flow_netlink.c   | 176 ++++++++++++++++++++++++-----
 net/openvswitch/vport-geneve.c   | 236 +++++++++++++++++++++++++++++++++++++++
 net/openvswitch/vport-gre.c      |   2 +-
 net/openvswitch/vport-vxlan.c    |   2 +-
 net/openvswitch/vport.c          |   3 +
 net/openvswitch/vport.h          |   1 +
 13 files changed, 461 insertions(+), 42 deletions(-)
 create mode 100644 net/openvswitch/vport-geneve.c

(limited to 'include/uapi')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index a9ce1556d773..5bc6edeb7143 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -86,17 +86,18 @@ struct ip_tunnel {
 	struct gro_cells	gro_cells;
 };
 
-#define TUNNEL_CSUM	__cpu_to_be16(0x01)
-#define TUNNEL_ROUTING	__cpu_to_be16(0x02)
-#define TUNNEL_KEY	__cpu_to_be16(0x04)
-#define TUNNEL_SEQ	__cpu_to_be16(0x08)
-#define TUNNEL_STRICT	__cpu_to_be16(0x10)
-#define TUNNEL_REC	__cpu_to_be16(0x20)
-#define TUNNEL_VERSION	__cpu_to_be16(0x40)
-#define TUNNEL_NO_KEY	__cpu_to_be16(0x80)
+#define TUNNEL_CSUM		__cpu_to_be16(0x01)
+#define TUNNEL_ROUTING		__cpu_to_be16(0x02)
+#define TUNNEL_KEY		__cpu_to_be16(0x04)
+#define TUNNEL_SEQ		__cpu_to_be16(0x08)
+#define TUNNEL_STRICT		__cpu_to_be16(0x10)
+#define TUNNEL_REC		__cpu_to_be16(0x20)
+#define TUNNEL_VERSION		__cpu_to_be16(0x40)
+#define TUNNEL_NO_KEY		__cpu_to_be16(0x80)
 #define TUNNEL_DONT_FRAGMENT    __cpu_to_be16(0x0100)
-#define TUNNEL_OAM	__cpu_to_be16(0x0200)
-#define TUNNEL_CRIT_OPT	__cpu_to_be16(0x0400)
+#define TUNNEL_OAM		__cpu_to_be16(0x0200)
+#define TUNNEL_CRIT_OPT		__cpu_to_be16(0x0400)
+#define TUNNEL_OPTIONS_PRESENT	__cpu_to_be16(0x0800)
 
 struct tnl_ptk_info {
 	__be16 flags;
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 6753032832e2..435eabc5ffaa 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -192,6 +192,7 @@ enum ovs_vport_type {
 	OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */
 	OVS_VPORT_TYPE_GRE,      /* GRE tunnel. */
 	OVS_VPORT_TYPE_VXLAN,	 /* VXLAN tunnel. */
+	OVS_VPORT_TYPE_GENEVE,	 /* Geneve tunnel. */
 	__OVS_VPORT_TYPE_MAX
 };
 
@@ -310,6 +311,7 @@ enum ovs_tunnel_key_attr {
 	OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT,      /* No argument, set DF. */
 	OVS_TUNNEL_KEY_ATTR_CSUM,               /* No argument. CSUM packet. */
 	OVS_TUNNEL_KEY_ATTR_OAM,                /* No argument. OAM frame.  */
+	OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,        /* Array of Geneve options. */
 	__OVS_TUNNEL_KEY_ATTR_MAX
 };
 
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 6ecf491ad509..ba3bb8203b99 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -54,3 +54,14 @@ config OPENVSWITCH_VXLAN
 	  Say N to exclude this support and reduce the binary size.
 
 	  If unsure, say Y.
+
+config OPENVSWITCH_GENEVE
+	bool "Open vSwitch Geneve tunneling support"
+	depends on INET
+	depends on OPENVSWITCH
+	depends on GENEVE && !(OPENVSWITCH=y && GENEVE=m)
+	default y
+	---help---
+	  If you say Y here, then the Open vSwitch will be able create geneve vport.
+
+	  Say N to exclude this support and reduce the binary size.
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 3591cb5dae91..9a33a273c375 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -15,6 +15,10 @@ openvswitch-y := \
 	vport-internal_dev.o \
 	vport-netdev.o
 
+ifneq ($(CONFIG_OPENVSWITCH_GENEVE),)
+openvswitch-y += vport-geneve.o
+endif
+
 ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
 openvswitch-y += vport-vxlan.o
 endif
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 010125c48244..2e31d9e7f4dc 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -370,6 +370,7 @@ static size_t key_attr_size(void)
 		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
 		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_CSUM */
 		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_OAM */
+		  + nla_total_size(256)   /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
 		+ nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
 		+ nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
 		+ nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
@@ -556,10 +557,12 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 
 	err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
 				   &flow->key, 0, &acts);
-	rcu_assign_pointer(flow->sf_acts, acts);
 	if (err)
 		goto err_flow_free;
 
+	rcu_assign_pointer(flow->sf_acts, acts);
+
+	OVS_CB(packet)->egress_tun_info = NULL;
 	OVS_CB(packet)->flow = flow;
 	packet->priority = flow->key.phy.priority;
 	packet->mark = flow->key.phy.skb_mark;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 2924cb340868..62db02ba36bc 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -448,6 +448,9 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 	int error;
 	struct ethhdr *eth;
 
+	/* Flags are always used as part of stats */
+	key->tp.flags = 0;
+
 	skb_reset_mac_header(skb);
 
 	/* Link layer.  We are guaranteed to have at least the 14 byte Ethernet
@@ -646,10 +649,23 @@ int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info,
 			 struct sk_buff *skb, struct sw_flow_key *key)
 {
 	/* Extract metadata from packet. */
-	if (tun_info)
+	if (tun_info) {
 		memcpy(&key->tun_key, &tun_info->tunnel, sizeof(key->tun_key));
-	else
+
+		if (tun_info->options) {
+			BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
+						   8)) - 1
+					> sizeof(key->tun_opts));
+			memcpy(GENEVE_OPTS(key, tun_info->options_len),
+			       tun_info->options, tun_info->options_len);
+			key->tun_opts_len = tun_info->options_len;
+		} else {
+			key->tun_opts_len = 0;
+		}
+	} else  {
+		key->tun_opts_len = 0;
 		memset(&key->tun_key, 0, sizeof(key->tun_key));
+	}
 
 	key->phy.priority = skb->priority;
 	key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index fe5a71b81c1f..71813318c8c7 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -51,11 +51,24 @@ struct ovs_key_ipv4_tunnel {
 
 struct ovs_tunnel_info {
 	struct ovs_key_ipv4_tunnel tunnel;
+	struct geneve_opt *options;
+	u8 options_len;
 };
 
+/* Store options at the end of the array if they are less than the
+ * maximum size. This allows us to get the benefits of variable length
+ * matching for small options.
+ */
+#define GENEVE_OPTS(flow_key, opt_len)	\
+	((struct geneve_opt *)((flow_key)->tun_opts + \
+			       FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \
+			       opt_len))
+
 static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
 					  const struct iphdr *iph,
-					  __be64 tun_id, __be16 tun_flags)
+					  __be64 tun_id, __be16 tun_flags,
+					  struct geneve_opt *opts,
+					  u8 opts_len)
 {
 	tun_info->tunnel.tun_id = tun_id;
 	tun_info->tunnel.ipv4_src = iph->saddr;
@@ -67,9 +80,14 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
 	/* clear struct padding. */
 	memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0,
 	       sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE);
+
+	tun_info->options = opts;
+	tun_info->options_len = opts_len;
 }
 
 struct sw_flow_key {
+	u8 tun_opts[255];
+	u8 tun_opts_len;
 	struct ovs_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel key. */
 	struct {
 		u32	priority;	/* Packet QoS priority. */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 5d6194d9dadc..368f23307911 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -42,6 +42,7 @@
 #include <linux/icmp.h>
 #include <linux/icmpv6.h>
 #include <linux/rculist.h>
+#include <net/geneve.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/ndisc.h>
@@ -88,18 +89,20 @@ static void update_range__(struct sw_flow_match *match,
 		}                                                           \
 	} while (0)
 
-#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
-	do { \
-		update_range__(match, offsetof(struct sw_flow_key, field),  \
-				len, is_mask);                              \
-		if (is_mask) {						    \
-			if ((match)->mask)				    \
-				memcpy(&(match)->mask->key.field, value_p, len);\
-		} else {                                                    \
-			memcpy(&(match)->key->field, value_p, len);         \
-		}                                                           \
+#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask)	    \
+	do {								    \
+		update_range__(match, offset, len, is_mask);		    \
+		if (is_mask)						    \
+			memcpy((u8 *)&(match)->mask->key + offset, value_p, \
+			       len);					    \
+		else							    \
+			memcpy((u8 *)(match)->key + offset, value_p, len);  \
 	} while (0)
 
+#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask)		      \
+	SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
+				  value_p, len, is_mask)
+
 static u16 range_n_bytes(const struct sw_flow_key_range *range)
 {
 	return range->end - range->start;
@@ -335,6 +338,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 	int rem;
 	bool ttl = false;
 	__be16 tun_flags = 0;
+	unsigned long opt_key_offset;
 
 	nla_for_each_nested(a, attr, rem) {
 		int type = nla_type(a);
@@ -347,6 +351,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 			[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
 			[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
 			[OVS_TUNNEL_KEY_ATTR_OAM] = 0,
+			[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
 		};
 
 		if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
@@ -355,7 +360,8 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 			return -EINVAL;
 		}
 
-		if (ovs_tunnel_key_lens[type] != nla_len(a)) {
+		if (ovs_tunnel_key_lens[type] != nla_len(a) &&
+		    ovs_tunnel_key_lens[type] != -1) {
 			OVS_NLERR("IPv4 tunnel attribute type has unexpected "
 				  " length (type=%d, length=%d, expected=%d).\n",
 				  type, nla_len(a), ovs_tunnel_key_lens[type]);
@@ -394,7 +400,60 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 		case OVS_TUNNEL_KEY_ATTR_OAM:
 			tun_flags |= TUNNEL_OAM;
 			break;
+		case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+			tun_flags |= TUNNEL_OPTIONS_PRESENT;
+			if (nla_len(a) > sizeof(match->key->tun_opts)) {
+				OVS_NLERR("Geneve option length exceeds maximum size (len %d, max %zu).\n",
+					  nla_len(a),
+					  sizeof(match->key->tun_opts));
+				return -EINVAL;
+			}
+
+			if (nla_len(a) % 4 != 0) {
+				OVS_NLERR("Geneve option length is not a multiple of 4 (len %d).\n",
+					  nla_len(a));
+				return -EINVAL;
+			}
+
+			/* We need to record the length of the options passed
+			 * down, otherwise packets with the same format but
+			 * additional options will be silently matched.
+			 */
+			if (!is_mask) {
+				SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
+						false);
+			} else {
+				/* This is somewhat unusual because it looks at
+				 * both the key and mask while parsing the
+				 * attributes (and by extension assumes the key
+				 * is parsed first). Normally, we would verify
+				 * that each is the correct length and that the
+				 * attributes line up in the validate function.
+				 * However, that is difficult because this is
+				 * variable length and we won't have the
+				 * information later.
+				 */
+				if (match->key->tun_opts_len != nla_len(a)) {
+					OVS_NLERR("Geneve option key length (%d) is different from mask length (%d).",
+						  match->key->tun_opts_len,
+						  nla_len(a));
+					return -EINVAL;
+				}
+
+				SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff,
+						true);
+			}
+
+			opt_key_offset = (unsigned long)GENEVE_OPTS(
+					  (struct sw_flow_key *)0,
+					  nla_len(a));
+			SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset,
+						  nla_data(a), nla_len(a),
+						  is_mask);
+			break;
 		default:
+			OVS_NLERR("Unknown IPv4 tunnel attribute (%d).\n",
+				  type);
 			return -EINVAL;
 		}
 	}
@@ -421,16 +480,11 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 	return 0;
 }
 
-static int ipv4_tun_to_nlattr(struct sk_buff *skb,
-			      const struct ovs_key_ipv4_tunnel *tun_key,
-			      const struct ovs_key_ipv4_tunnel *output)
+static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
+				const struct ovs_key_ipv4_tunnel *output,
+				const struct geneve_opt *tun_opts,
+				int swkey_tun_opts_len)
 {
-	struct nlattr *nla;
-
-	nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
-	if (!nla)
-		return -EMSGSIZE;
-
 	if (output->tun_flags & TUNNEL_KEY &&
 	    nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
 		return -EMSGSIZE;
@@ -454,12 +508,35 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
 	if ((output->tun_flags & TUNNEL_OAM) &&
 	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
 		return -EMSGSIZE;
+	if (tun_opts &&
+	    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
+		    swkey_tun_opts_len, tun_opts))
+		return -EMSGSIZE;
 
-	nla_nest_end(skb, nla);
 	return 0;
 }
 
 
+static int ipv4_tun_to_nlattr(struct sk_buff *skb,
+			      const struct ovs_key_ipv4_tunnel *output,
+			      const struct geneve_opt *tun_opts,
+			      int swkey_tun_opts_len)
+{
+	struct nlattr *nla;
+	int err;
+
+	nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
+	if (!nla)
+		return -EMSGSIZE;
+
+	err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len);
+	if (err)
+		return err;
+
+	nla_nest_end(skb, nla);
+	return 0;
+}
+
 static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
 				 const struct nlattr **a, bool is_mask)
 {
@@ -905,9 +982,16 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
 	if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
 		goto nla_put_failure;
 
-	if ((swkey->tun_key.ipv4_dst || is_mask) &&
-	    ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
-		goto nla_put_failure;
+	if ((swkey->tun_key.ipv4_dst || is_mask)) {
+		const struct geneve_opt *opts = NULL;
+
+		if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
+			opts = GENEVE_OPTS(output, swkey->tun_opts_len);
+
+		if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
+				       swkey->tun_opts_len))
+			goto nla_put_failure;
+	}
 
 	if (swkey->phy.in_port == DP_MAX_PORTS) {
 		if (is_mask && (output->phy.in_port == 0xffff))
@@ -1290,17 +1374,55 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	if (err)
 		return err;
 
+	if (key.tun_opts_len) {
+		struct geneve_opt *option = GENEVE_OPTS(&key,
+							key.tun_opts_len);
+		int opts_len = key.tun_opts_len;
+		bool crit_opt = false;
+
+		while (opts_len > 0) {
+			int len;
+
+			if (opts_len < sizeof(*option))
+				return -EINVAL;
+
+			len = sizeof(*option) + option->length * 4;
+			if (len > opts_len)
+				return -EINVAL;
+
+			crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
+
+			option = (struct geneve_opt *)((u8 *)option + len);
+			opts_len -= len;
+		};
+
+		key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
+	};
+
 	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
 	if (start < 0)
 		return start;
 
 	a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
-			 sizeof(*tun_info));
+			 sizeof(*tun_info) + key.tun_opts_len);
 	if (IS_ERR(a))
 		return PTR_ERR(a);
 
 	tun_info = nla_data(a);
 	tun_info->tunnel = key.tun_key;
+	tun_info->options_len = key.tun_opts_len;
+
+	if (tun_info->options_len) {
+		/* We need to store the options in the action itself since
+		 * everything else will go away after flow setup. We can append
+		 * it to tun_info and then point there.
+		 */
+		memcpy((tun_info + 1), GENEVE_OPTS(&key, key.tun_opts_len),
+		       key.tun_opts_len);
+		tun_info->options = (struct geneve_opt *)(tun_info + 1);
+	} else {
+		tun_info->options = NULL;
+	}
 
 	add_nested_action_end(*sfa, start);
 
@@ -1592,7 +1714,9 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
 			return -EMSGSIZE;
 
 		err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel,
-					 nla_data(ovs_key));
+					 tun_info->options_len ?
+						tun_info->options : NULL,
+					 tun_info->options_len);
 		if (err)
 			return err;
 		nla_nest_end(skb, start);
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
new file mode 100644
index 000000000000..5572d482f285
--- /dev/null
+++ b/net/openvswitch/vport-geneve.c
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2014 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/version.h>
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/net.h>
+#include <linux/rculist.h>
+#include <linux/udp.h>
+#include <linux/if_vlan.h>
+
+#include <net/geneve.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/udp.h>
+#include <net/xfrm.h>
+
+#include "datapath.h"
+#include "vport.h"
+
+/**
+ * struct geneve_port - Keeps track of open UDP ports
+ * @sock: The socket created for this port number.
+ * @name: vport name.
+ */
+struct geneve_port {
+	struct geneve_sock *gs;
+	char name[IFNAMSIZ];
+};
+
+static LIST_HEAD(geneve_ports);
+
+static inline struct geneve_port *geneve_vport(const struct vport *vport)
+{
+	return vport_priv(vport);
+}
+
+static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
+{
+	return (struct genevehdr *)(udp_hdr(skb) + 1);
+}
+
+/* Convert 64 bit tunnel ID to 24 bit VNI. */
+static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
+{
+#ifdef __BIG_ENDIAN
+	vni[0] = (__force __u8)(tun_id >> 16);
+	vni[1] = (__force __u8)(tun_id >> 8);
+	vni[2] = (__force __u8)tun_id;
+#else
+	vni[0] = (__force __u8)((__force u64)tun_id >> 40);
+	vni[1] = (__force __u8)((__force u64)tun_id >> 48);
+	vni[2] = (__force __u8)((__force u64)tun_id >> 56);
+#endif
+}
+
+/* Convert 24 bit VNI to 64 bit tunnel ID. */
+static __be64 vni_to_tunnel_id(__u8 *vni)
+{
+#ifdef __BIG_ENDIAN
+	return (vni[0] << 16) | (vni[1] << 8) | vni[2];
+#else
+	return (__force __be64)(((__force u64)vni[0] << 40) |
+				((__force u64)vni[1] << 48) |
+				((__force u64)vni[2] << 56));
+#endif
+}
+
+static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
+{
+	struct vport *vport = gs->rcv_data;
+	struct genevehdr *geneveh = geneve_hdr(skb);
+	int opts_len;
+	struct ovs_tunnel_info tun_info;
+	__be64 key;
+	__be16 flags;
+
+	opts_len = geneveh->opt_len * 4;
+
+	flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT |
+		(udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
+		(geneveh->oam ? TUNNEL_OAM : 0) |
+		(geneveh->critical ? TUNNEL_CRIT_OPT : 0);
+
+	key = vni_to_tunnel_id(geneveh->vni);
+
+	ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, flags,
+			       geneveh->options, opts_len);
+
+	ovs_vport_receive(vport, skb, &tun_info);
+}
+
+static int geneve_get_options(const struct vport *vport,
+			      struct sk_buff *skb)
+{
+	struct geneve_port *geneve_port = geneve_vport(vport);
+	__be16 sport;
+
+	sport = ntohs(inet_sk(geneve_port->gs->sock->sk)->inet_sport);
+	if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, sport))
+		return -EMSGSIZE;
+	return 0;
+}
+
+static void geneve_tnl_destroy(struct vport *vport)
+{
+	struct geneve_port *geneve_port = geneve_vport(vport);
+
+	geneve_sock_release(geneve_port->gs);
+
+	ovs_vport_deferred_free(vport);
+}
+
+static struct vport *geneve_tnl_create(const struct vport_parms *parms)
+{
+	struct net *net = ovs_dp_get_net(parms->dp);
+	struct nlattr *options = parms->options;
+	struct geneve_port *geneve_port;
+	struct geneve_sock *gs;
+	struct vport *vport;
+	struct nlattr *a;
+	int err;
+	u16 dst_port;
+
+	if (!options) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
+	if (a && nla_len(a) == sizeof(u16)) {
+		dst_port = nla_get_u16(a);
+	} else {
+		/* Require destination port from userspace. */
+		err = -EINVAL;
+		goto error;
+	}
+
+	vport = ovs_vport_alloc(sizeof(struct geneve_port),
+				&ovs_geneve_vport_ops, parms);
+	if (IS_ERR(vport))
+		return vport;
+
+	geneve_port = geneve_vport(vport);
+	strncpy(geneve_port->name, parms->name, IFNAMSIZ);
+
+	gs = geneve_sock_add(net, htons(dst_port), geneve_rcv, vport, true, 0);
+	if (IS_ERR(gs)) {
+		ovs_vport_free(vport);
+		return (void *)gs;
+	}
+	geneve_port->gs = gs;
+
+	return vport;
+error:
+	return ERR_PTR(err);
+}
+
+static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
+{
+	struct ovs_key_ipv4_tunnel *tun_key;
+	struct ovs_tunnel_info *tun_info;
+	struct net *net = ovs_dp_get_net(vport->dp);
+	struct geneve_port *geneve_port = geneve_vport(vport);
+	__be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport;
+	__be16 sport;
+	struct rtable *rt;
+	struct flowi4 fl;
+	u8 vni[3];
+	__be16 df;
+	int err;
+
+	tun_info = OVS_CB(skb)->egress_tun_info;
+	if (unlikely(!tun_info)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	tun_key = &tun_info->tunnel;
+
+	/* Route lookup */
+	memset(&fl, 0, sizeof(fl));
+	fl.daddr = tun_key->ipv4_dst;
+	fl.saddr = tun_key->ipv4_src;
+	fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
+	fl.flowi4_mark = skb->mark;
+	fl.flowi4_proto = IPPROTO_UDP;
+
+	rt = ip_route_output_key(net, &fl);
+	if (IS_ERR(rt)) {
+		err = PTR_ERR(rt);
+		goto error;
+	}
+
+	df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
+	sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
+	tunnel_id_to_vni(tun_key->tun_id, vni);
+	skb->ignore_df = 1;
+
+	err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr,
+			      tun_key->ipv4_dst, tun_key->ipv4_tos,
+			      tun_key->ipv4_ttl, df, sport, dport,
+			      tun_key->tun_flags, vni,
+			      tun_info->options_len, (u8 *)tun_info->options,
+			      false);
+	if (err < 0)
+		ip_rt_put(rt);
+error:
+	return err;
+}
+
+static const char *geneve_get_name(const struct vport *vport)
+{
+	struct geneve_port *geneve_port = geneve_vport(vport);
+
+	return geneve_port->name;
+}
+
+const struct vport_ops ovs_geneve_vport_ops = {
+	.type		= OVS_VPORT_TYPE_GENEVE,
+	.create		= geneve_tnl_create,
+	.destroy	= geneve_tnl_destroy,
+	.get_name	= geneve_get_name,
+	.get_options	= geneve_get_options,
+	.send		= geneve_tnl_send,
+};
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index fe768bd600eb..108b82da2fd9 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -106,7 +106,7 @@ static int gre_rcv(struct sk_buff *skb,
 
 	key = key_to_tunnel_id(tpi->key, tpi->seq);
 	ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key,
-			       filter_tnl_flags(tpi->flags));
+			       filter_tnl_flags(tpi->flags), NULL, 0);
 
 	ovs_vport_receive(vport, skb, &tun_info);
 	return PACKET_RCVD;
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 5fbff2c1ee49..2735e01dca73 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -66,7 +66,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
 	/* Save outer tunnel values */
 	iph = ip_hdr(skb);
 	key = cpu_to_be64(ntohl(vx_vni) >> 8);
-	ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY);
+	ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0);
 
 	ovs_vport_receive(vport, skb, &tun_info);
 }
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 3e50ee8a218c..53001b020ca7 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -48,6 +48,9 @@ static const struct vport_ops *vport_ops_list[] = {
 #ifdef CONFIG_OPENVSWITCH_VXLAN
 	&ovs_vxlan_vport_ops,
 #endif
+#ifdef CONFIG_OPENVSWITCH_GENEVE
+	&ovs_geneve_vport_ops,
+#endif
 };
 
 /* Protected by RCU read lock for reading, ovs_mutex for writing. */
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index e28964aba021..8942125de3a6 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -215,6 +215,7 @@ extern const struct vport_ops ovs_netdev_vport_ops;
 extern const struct vport_ops ovs_internal_vport_ops;
 extern const struct vport_ops ovs_gre_vport_ops;
 extern const struct vport_ops ovs_vxlan_vport_ops;
+extern const struct vport_ops ovs_geneve_vport_ops;
 
 static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
 				      const void *start, unsigned int len)
-- 
cgit v1.2.3


From 1255a5055449781a92076fc5429952f2b33cf309 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 5 Oct 2014 12:35:21 +0300
Subject: ethtool: Ethtool parameter to dynamically change tx_copybreak

Use new ethtool [sg]et_tunable() to set tx_copybread (inline threshold)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 1 +
 net/core/ethtool.c           | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 7a364f2f3d3f..99b43056a6fe 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -212,6 +212,7 @@ struct ethtool_value {
 enum tunable_id {
 	ETHTOOL_ID_UNSPEC,
 	ETHTOOL_RX_COPYBREAK,
+	ETHTOOL_TX_COPYBREAK,
 };
 
 enum tunable_type_id {
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 27e61b886520..1600aa24d36b 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1625,6 +1625,7 @@ static int ethtool_tunable_valid(const struct ethtool_tunable *tuna)
 {
 	switch (tuna->id) {
 	case ETHTOOL_RX_COPYBREAK:
+	case ETHTOOL_TX_COPYBREAK:
 		if (tuna->len != sizeof(u32) ||
 		    tuna->type_id != ETHTOOL_TUNABLE_U32)
 			return -EINVAL;
-- 
cgit v1.2.3


From f0d1f04f0a2f662b6b617e24d115fddcf6ef8723 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 7 Oct 2014 19:02:11 +0200
Subject: netfilter: fix wrong arithmetics regarding NFT_REJECT_ICMPX_MAX

NFT_REJECT_ICMPX_MAX should be __NFT_REJECT_ICMPX_MAX - 1.

nft_reject_icmp_code() and nft_reject_icmpv6_code() are called from the
packet path, so BUG_ON in case we try to access an unknown abstracted
ICMP code. This should not happen since we already validate this from
nft_reject_{inet,bridge}_init().

Fixes: 51b0a5d ("netfilter: nft_reject: introduce icmp code abstraction for inet and bridge")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  2 +-
 net/netfilter/nft_reject.c               | 10 ++++------
 2 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index c26df6787fb0..f31fe7b660a5 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -774,7 +774,7 @@ enum nft_reject_inet_code {
 	NFT_REJECT_ICMPX_ADMIN_PROHIBITED,
 	__NFT_REJECT_ICMPX_MAX
 };
-#define NFT_REJECT_ICMPX_MAX	(__NFT_REJECT_ICMPX_MAX + 1)
+#define NFT_REJECT_ICMPX_MAX	(__NFT_REJECT_ICMPX_MAX - 1)
 
 /**
  * enum nft_reject_attributes - nf_tables reject expression netlink attributes
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index ec8a456092a7..57d3e1af5630 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -72,7 +72,7 @@ nla_put_failure:
 }
 EXPORT_SYMBOL_GPL(nft_reject_dump);
 
-static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX] = {
+static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX + 1] = {
 	[NFT_REJECT_ICMPX_NO_ROUTE]		= ICMP_NET_UNREACH,
 	[NFT_REJECT_ICMPX_PORT_UNREACH]		= ICMP_PORT_UNREACH,
 	[NFT_REJECT_ICMPX_HOST_UNREACH]		= ICMP_HOST_UNREACH,
@@ -81,8 +81,7 @@ static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX] = {
 
 int nft_reject_icmp_code(u8 code)
 {
-	if (code > NFT_REJECT_ICMPX_MAX)
-		return -EINVAL;
+	BUG_ON(code > NFT_REJECT_ICMPX_MAX);
 
 	return icmp_code_v4[code];
 }
@@ -90,7 +89,7 @@ int nft_reject_icmp_code(u8 code)
 EXPORT_SYMBOL_GPL(nft_reject_icmp_code);
 
 
-static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX] = {
+static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX + 1] = {
 	[NFT_REJECT_ICMPX_NO_ROUTE]		= ICMPV6_NOROUTE,
 	[NFT_REJECT_ICMPX_PORT_UNREACH]		= ICMPV6_PORT_UNREACH,
 	[NFT_REJECT_ICMPX_HOST_UNREACH]		= ICMPV6_ADDR_UNREACH,
@@ -99,8 +98,7 @@ static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX] = {
 
 int nft_reject_icmpv6_code(u8 code)
 {
-	if (code > NFT_REJECT_ICMPX_MAX)
-		return -EINVAL;
+	BUG_ON(code > NFT_REJECT_ICMPX_MAX);
 
 	return icmp_code_v6[code];
 }
-- 
cgit v1.2.3


From 66b43081c0bde3171208a7cb52f5807dce4a79e4 Mon Sep 17 00:00:00 2001
From: Ian Munsie <imunsie@au1.ibm.com>
Date: Wed, 8 Oct 2014 19:55:03 +1100
Subject: cxl: Add userspace header file

This adds a header file for use by userspace programs wanting to interact with
the kernel cxl driver.  It defines structs and magic numbers required for
userspace to interact with devices in /dev/cxl/afuM.N.

Further documentation on this interface is added in a subsequent patch in
Documentation/powerpc/cxl.txt.

It also adds this new userspace header file to Kbuild so it's exported when
doing "make headers_installs".

Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 include/uapi/Kbuild      |  1 +
 include/uapi/misc/Kbuild |  2 ++
 include/uapi/misc/cxl.h  | 87 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 90 insertions(+)
 create mode 100644 include/uapi/misc/Kbuild
 create mode 100644 include/uapi/misc/cxl.h

(limited to 'include/uapi')

diff --git a/include/uapi/Kbuild b/include/uapi/Kbuild
index 81d2106287fe..245aa6e05e6a 100644
--- a/include/uapi/Kbuild
+++ b/include/uapi/Kbuild
@@ -12,3 +12,4 @@ header-y += video/
 header-y += drm/
 header-y += xen/
 header-y += scsi/
+header-y += misc/
diff --git a/include/uapi/misc/Kbuild b/include/uapi/misc/Kbuild
new file mode 100644
index 000000000000..e96cae7d58c9
--- /dev/null
+++ b/include/uapi/misc/Kbuild
@@ -0,0 +1,2 @@
+# misc Header export list
+header-y += cxl.h
diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h
new file mode 100644
index 000000000000..c232be6ae21f
--- /dev/null
+++ b/include/uapi/misc/cxl.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_MISC_CXL_H
+#define _UAPI_MISC_CXL_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/* Structs for IOCTLS for userspace to talk to the kernel */
+struct cxl_ioctl_start_work {
+	__u64 flags;
+	__u64 work_element_descriptor;
+	__u64 amr;
+	__s16 num_interrupts;
+	__s16 reserved1;
+	__s32 reserved2;
+	__u64 reserved3;
+	__u64 reserved4;
+	__u64 reserved5;
+	__u64 reserved6;
+};
+#define CXL_START_WORK_AMR		0x0000000000000001ULL
+#define CXL_START_WORK_NUM_IRQS		0x0000000000000002ULL
+#define CXL_START_WORK_ALL		(CXL_START_WORK_AMR |\
+					 CXL_START_WORK_NUM_IRQS)
+
+/* IOCTL numbers */
+#define CXL_MAGIC 0xCA
+#define CXL_IOCTL_START_WORK		_IOW(CXL_MAGIC, 0x00, struct cxl_ioctl_start_work)
+#define CXL_IOCTL_GET_PROCESS_ELEMENT	_IOR(CXL_MAGIC, 0x01, __u32)
+
+/* Events from read() */
+#define CXL_READ_MIN_SIZE 0x1000 /* 4K */
+
+enum cxl_event_type {
+	CXL_EVENT_RESERVED      = 0,
+	CXL_EVENT_AFU_INTERRUPT = 1,
+	CXL_EVENT_DATA_STORAGE  = 2,
+	CXL_EVENT_AFU_ERROR     = 3,
+};
+
+struct cxl_event_header {
+	__u16 type;
+	__u16 size;
+	__u16 process_element;
+	__u16 reserved1;
+};
+
+struct cxl_event_afu_interrupt {
+	__u16 flags;
+	__u16 irq; /* Raised AFU interrupt number */
+	__u32 reserved1;
+};
+
+struct cxl_event_data_storage {
+	__u16 flags;
+	__u16 reserved1;
+	__u32 reserved2;
+	__u64 addr;
+	__u64 dsisr;
+	__u64 reserved3;
+};
+
+struct cxl_event_afu_error {
+	__u16 flags;
+	__u16 reserved1;
+	__u32 reserved2;
+	__u64 error;
+};
+
+struct cxl_event {
+	struct cxl_event_header header;
+	union {
+		struct cxl_event_afu_interrupt irq;
+		struct cxl_event_data_storage fault;
+		struct cxl_event_afu_error afu_error;
+	};
+};
+
+#endif /* _UAPI_MISC_CXL_H */
-- 
cgit v1.2.3


From a9282d01cf357379ce29103cec5e7651a53c634d Mon Sep 17 00:00:00 2001
From: Ian Munsie <imunsie@au1.ibm.com>
Date: Wed, 8 Oct 2014 19:55:05 +1100
Subject: cxl: Add documentation for userspace APIs

This documentation gives an overview of the hardware architecture, userspace
APIs via /dev/cxl/afuM.N and the syfs files. It also adds a MAINTAINERS file
entry for cxl.

Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 Documentation/ABI/testing/sysfs-class-cxl | 129 ++++++++++
 Documentation/ioctl/ioctl-number.txt      |   1 +
 Documentation/powerpc/00-INDEX            |   2 +
 Documentation/powerpc/cxl.txt             | 379 ++++++++++++++++++++++++++++++
 MAINTAINERS                               |  12 +
 include/uapi/misc/cxl.h                   |   7 +-
 6 files changed, 527 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-class-cxl
 create mode 100644 Documentation/powerpc/cxl.txt

(limited to 'include/uapi')

diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl
new file mode 100644
index 000000000000..554405ec1955
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-cxl
@@ -0,0 +1,129 @@
+Slave contexts (eg. /sys/class/cxl/afu0.0s):
+
+What:           /sys/class/cxl/<afu>/irqs_max
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read/write
+                Decimal value of maximum number of interrupts that can be
+                requested by userspace.  The default on probe is the maximum
+                that hardware can support (eg. 2037). Write values will limit
+                userspace applications to that many userspace interrupts. Must
+                be >= irqs_min.
+
+What:           /sys/class/cxl/<afu>/irqs_min
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Decimal value of the minimum number of interrupts that
+                userspace must request on a CXL_START_WORK ioctl. Userspace may
+                omit the num_interrupts field in the START_WORK IOCTL to get
+                this minimum automatically.
+
+What:           /sys/class/cxl/<afu>/mmio_size
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Decimal value of the size of the MMIO space that may be mmaped
+                by userspace.
+
+What:           /sys/class/cxl/<afu>/modes_supported
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                List of the modes this AFU supports. One per line.
+                Valid entries are: "dedicated_process" and "afu_directed"
+
+What:           /sys/class/cxl/<afu>/mode
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read/write
+                The current mode the AFU is using. Will be one of the modes
+                given in modes_supported. Writing will change the mode
+                provided that no user contexts are attached.
+
+
+What:           /sys/class/cxl/<afu>/prefault_mode
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read/write
+                Set the mode for prefaulting in segments into the segment table
+                when performing the START_WORK ioctl. Possible values:
+                        none: No prefaulting (default)
+                        work_element_descriptor: Treat the work element
+                                 descriptor as an effective address and
+                                 prefault what it points to.
+                        all: all segments process calling START_WORK maps.
+
+What:           /sys/class/cxl/<afu>/reset
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    write only
+                Writing 1 here will reset the AFU provided there are not
+                contexts active on the AFU.
+
+What:           /sys/class/cxl/<afu>/api_version
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Decimal value of the current version of the kernel/user API.
+
+What:           /sys/class/cxl/<afu>/api_version_com
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Decimal value of the the lowest version of the userspace API
+                this this kernel supports.
+
+
+
+Master contexts (eg. /sys/class/cxl/afu0.0m)
+
+What:           /sys/class/cxl/<afu>m/mmio_size
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Decimal value of the size of the MMIO space that may be mmaped
+                by userspace. This includes all slave contexts space also.
+
+What:           /sys/class/cxl/<afu>m/pp_mmio_len
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Decimal value of the Per Process MMIO space length.
+
+What:           /sys/class/cxl/<afu>m/pp_mmio_off
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Decimal value of the Per Process MMIO space offset.
+
+
+Card info (eg. /sys/class/cxl/card0)
+
+What:           /sys/class/cxl/<card>/caia_version
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Identifies the CAIA Version the card implements.
+
+What:           /sys/class/cxl/<card>/psl_version
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Identifies the revision level of the PSL.
+
+What:           /sys/class/cxl/<card>/base_image
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Identifies the revision level of the base image for devices
+                that support loadable PSLs. For FPGAs this field identifies
+                the image contained in the on-adapter flash which is loaded
+                during the initial program load.
+
+What:           /sys/class/cxl/<card>/image_loaded
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Will return "user" or "factory" depending on the image loaded
+                onto the card.
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 7e240a7c9ab1..8136e1fd30fd 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -313,6 +313,7 @@ Code  Seq#(hex)	Include File		Comments
 0xB1	00-1F	PPPoX			<mailto:mostrows@styx.uwaterloo.ca>
 0xB3	00	linux/mmc/ioctl.h
 0xC0	00-0F	linux/usb/iowarrior.h
+0xCA	00-0F	uapi/misc/cxl.h
 0xCB	00-1F	CBM serial IEC bus	in development:
 					<mailto:michael.klein@puffin.lb.shuttle.de>
 0xCD	01	linux/reiserfs_fs.h
diff --git a/Documentation/powerpc/00-INDEX b/Documentation/powerpc/00-INDEX
index a68784d0a1ee..6fd0e8bb8140 100644
--- a/Documentation/powerpc/00-INDEX
+++ b/Documentation/powerpc/00-INDEX
@@ -11,6 +11,8 @@ bootwrapper.txt
 cpu_features.txt
 	- info on how we support a variety of CPUs with minimal compile-time
 	options.
+cxl.txt
+	- Overview of the CXL driver.
 eeh-pci-error-recovery.txt
 	- info on PCI Bus EEH Error Recovery
 firmware-assisted-dump.txt
diff --git a/Documentation/powerpc/cxl.txt b/Documentation/powerpc/cxl.txt
new file mode 100644
index 000000000000..2c71ecc519d9
--- /dev/null
+++ b/Documentation/powerpc/cxl.txt
@@ -0,0 +1,379 @@
+Coherent Accelerator Interface (CXL)
+====================================
+
+Introduction
+============
+
+    The coherent accelerator interface is designed to allow the
+    coherent connection of accelerators (FPGAs and other devices) to a
+    POWER system. These devices need to adhere to the Coherent
+    Accelerator Interface Architecture (CAIA).
+
+    IBM refers to this as the Coherent Accelerator Processor Interface
+    or CAPI. In the kernel it's referred to by the name CXL to avoid
+    confusion with the ISDN CAPI subsystem.
+
+    Coherent in this context means that the accelerator and CPUs can
+    both access system memory directly and with the same effective
+    addresses.
+
+
+Hardware overview
+=================
+
+          POWER8               FPGA
+       +----------+        +---------+
+       |          |        |         |
+       |   CPU    |        |   AFU   |
+       |          |        |         |
+       |          |        |         |
+       |          |        |         |
+       +----------+        +---------+
+       |   PHB    |        |         |
+       |   +------+        |   PSL   |
+       |   | CAPP |<------>|         |
+       +---+------+  PCIE  +---------+
+
+    The POWER8 chip has a Coherently Attached Processor Proxy (CAPP)
+    unit which is part of the PCIe Host Bridge (PHB). This is managed
+    by Linux by calls into OPAL. Linux doesn't directly program the
+    CAPP.
+
+    The FPGA (or coherently attached device) consists of two parts.
+    The POWER Service Layer (PSL) and the Accelerator Function Unit
+    (AFU). The AFU is used to implement specific functionality behind
+    the PSL. The PSL, among other things, provides memory address
+    translation services to allow each AFU direct access to userspace
+    memory.
+
+    The AFU is the core part of the accelerator (eg. the compression,
+    crypto etc function). The kernel has no knowledge of the function
+    of the AFU. Only userspace interacts directly with the AFU.
+
+    The PSL provides the translation and interrupt services that the
+    AFU needs. This is what the kernel interacts with. For example, if
+    the AFU needs to read a particular effective address, it sends
+    that address to the PSL, the PSL then translates it, fetches the
+    data from memory and returns it to the AFU. If the PSL has a
+    translation miss, it interrupts the kernel and the kernel services
+    the fault. The context to which this fault is serviced is based on
+    who owns that acceleration function.
+
+
+AFU Modes
+=========
+
+    There are two programming modes supported by the AFU. Dedicated
+    and AFU directed. AFU may support one or both modes.
+
+    When using dedicated mode only one MMU context is supported. In
+    this mode, only one userspace process can use the accelerator at
+    time.
+
+    When using AFU directed mode, up to 16K simultaneous contexts can
+    be supported. This means up to 16K simultaneous userspace
+    applications may use the accelerator (although specific AFUs may
+    support fewer). In this mode, the AFU sends a 16 bit context ID
+    with each of its requests. This tells the PSL which context is
+    associated with each operation. If the PSL can't translate an
+    operation, the ID can also be accessed by the kernel so it can
+    determine the userspace context associated with an operation.
+
+
+MMIO space
+==========
+
+    A portion of the accelerator MMIO space can be directly mapped
+    from the AFU to userspace. Either the whole space can be mapped or
+    just a per context portion. The hardware is self describing, hence
+    the kernel can determine the offset and size of the per context
+    portion.
+
+
+Interrupts
+==========
+
+    AFUs may generate interrupts that are destined for userspace. These
+    are received by the kernel as hardware interrupts and passed onto
+    userspace by a read syscall documented below.
+
+    Data storage faults and error interrupts are handled by the kernel
+    driver.
+
+
+Work Element Descriptor (WED)
+=============================
+
+    The WED is a 64-bit parameter passed to the AFU when a context is
+    started. Its format is up to the AFU hence the kernel has no
+    knowledge of what it represents. Typically it will be the
+    effective address of a work queue or status block where the AFU
+    and userspace can share control and status information.
+
+
+
+
+User API
+========
+
+    For AFUs operating in AFU directed mode, two character device
+    files will be created. /dev/cxl/afu0.0m will correspond to a
+    master context and /dev/cxl/afu0.0s will correspond to a slave
+    context. Master contexts have access to the full MMIO space an
+    AFU provides. Slave contexts have access to only the per process
+    MMIO space an AFU provides.
+
+    For AFUs operating in dedicated process mode, the driver will
+    only create a single character device per AFU called
+    /dev/cxl/afu0.0d. This will have access to the entire MMIO space
+    that the AFU provides (like master contexts in AFU directed).
+
+    The types described below are defined in include/uapi/misc/cxl.h
+
+    The following file operations are supported on both slave and
+    master devices.
+
+
+open
+----
+
+    Opens the device and allocates a file descriptor to be used with
+    the rest of the API.
+
+    A dedicated mode AFU only has one context and only allows the
+    device to be opened once.
+
+    An AFU directed mode AFU can have many contexts, the device can be
+    opened once for each context that is available.
+
+    When all available contexts are allocated the open call will fail
+    and return -ENOSPC.
+
+    Note: IRQs need to be allocated for each context, which may limit
+          the number of contexts that can be created, and therefore
+          how many times the device can be opened. The POWER8 CAPP
+          supports 2040 IRQs and 3 are used by the kernel, so 2037 are
+          left. If 1 IRQ is needed per context, then only 2037
+          contexts can be allocated. If 4 IRQs are needed per context,
+          then only 2037/4 = 509 contexts can be allocated.
+
+
+ioctl
+-----
+
+    CXL_IOCTL_START_WORK:
+        Starts the AFU context and associates it with the current
+        process. Once this ioctl is successfully executed, all memory
+        mapped into this process is accessible to this AFU context
+        using the same effective addresses. No additional calls are
+        required to map/unmap memory. The AFU memory context will be
+        updated as userspace allocates and frees memory. This ioctl
+        returns once the AFU context is started.
+
+        Takes a pointer to a struct cxl_ioctl_start_work:
+
+                struct cxl_ioctl_start_work {
+                        __u64 flags;
+                        __u64 work_element_descriptor;
+                        __u64 amr;
+                        __s16 num_interrupts;
+                        __s16 reserved1;
+                        __s32 reserved2;
+                        __u64 reserved3;
+                        __u64 reserved4;
+                        __u64 reserved5;
+                        __u64 reserved6;
+                };
+
+            flags:
+                Indicates which optional fields in the structure are
+                valid.
+
+            work_element_descriptor:
+                The Work Element Descriptor (WED) is a 64-bit argument
+                defined by the AFU. Typically this is an effective
+                address pointing to an AFU specific structure
+                describing what work to perform.
+
+            amr:
+                Authority Mask Register (AMR), same as the powerpc
+                AMR. This field is only used by the kernel when the
+                corresponding CXL_START_WORK_AMR value is specified in
+                flags. If not specified the kernel will use a default
+                value of 0.
+
+            num_interrupts:
+                Number of userspace interrupts to request. This field
+                is only used by the kernel when the corresponding
+                CXL_START_WORK_NUM_IRQS value is specified in flags.
+                If not specified the minimum number required by the
+                AFU will be allocated. The min and max number can be
+                obtained from sysfs.
+
+            reserved fields:
+                For ABI padding and future extensions
+
+    CXL_IOCTL_GET_PROCESS_ELEMENT:
+        Get the current context id, also known as the process element.
+        The value is returned from the kernel as a __u32.
+
+
+mmap
+----
+
+    An AFU may have an MMIO space to facilitate communication with the
+    AFU. If it does, the MMIO space can be accessed via mmap. The size
+    and contents of this area are specific to the particular AFU. The
+    size can be discovered via sysfs.
+
+    In AFU directed mode, master contexts are allowed to map all of
+    the MMIO space and slave contexts are allowed to only map the per
+    process MMIO space associated with the context. In dedicated
+    process mode the entire MMIO space can always be mapped.
+
+    This mmap call must be done after the START_WORK ioctl.
+
+    Care should be taken when accessing MMIO space. Only 32 and 64-bit
+    accesses are supported by POWER8. Also, the AFU will be designed
+    with a specific endianness, so all MMIO accesses should consider
+    endianness (recommend endian(3) variants like: le64toh(),
+    be64toh() etc). These endian issues equally apply to shared memory
+    queues the WED may describe.
+
+
+read
+----
+
+    Reads events from the AFU. Blocks if no events are pending
+    (unless O_NONBLOCK is supplied). Returns -EIO in the case of an
+    unrecoverable error or if the card is removed.
+
+    read() will always return an integral number of events.
+
+    The buffer passed to read() must be at least 4K bytes.
+
+    The result of the read will be a buffer of one or more events,
+    each event is of type struct cxl_event, of varying size.
+
+            struct cxl_event {
+                    struct cxl_event_header header;
+                    union {
+                            struct cxl_event_afu_interrupt irq;
+                            struct cxl_event_data_storage fault;
+                            struct cxl_event_afu_error afu_error;
+                    };
+            };
+
+    The struct cxl_event_header is defined as:
+
+            struct cxl_event_header {
+                    __u16 type;
+                    __u16 size;
+                    __u16 process_element;
+                    __u16 reserved1;
+            };
+
+        type:
+            This defines the type of event. The type determines how
+            the rest of the event is structured. These types are
+            described below and defined by enum cxl_event_type.
+
+        size:
+            This is the size of the event in bytes including the
+            struct cxl_event_header. The start of the next event can
+            be found at this offset from the start of the current
+            event.
+
+        process_element:
+            Context ID of the event.
+
+        reserved field:
+            For future extensions and padding.
+
+    If the event type is CXL_EVENT_AFU_INTERRUPT then the event
+    structure is defined as:
+
+            struct cxl_event_afu_interrupt {
+                    __u16 flags;
+                    __u16 irq; /* Raised AFU interrupt number */
+                    __u32 reserved1;
+            };
+
+        flags:
+            These flags indicate which optional fields are present
+            in this struct. Currently all fields are mandatory.
+
+        irq:
+            The IRQ number sent by the AFU.
+
+        reserved field:
+            For future extensions and padding.
+
+    If the event type is CXL_EVENT_DATA_STORAGE then the event
+    structure is defined as:
+
+            struct cxl_event_data_storage {
+                    __u16 flags;
+                    __u16 reserved1;
+                    __u32 reserved2;
+                    __u64 addr;
+                    __u64 dsisr;
+                    __u64 reserved3;
+            };
+
+        flags:
+            These flags indicate which optional fields are present in
+            this struct. Currently all fields are mandatory.
+
+        address:
+            The address that the AFU unsuccessfully attempted to
+            access. Valid accesses will be handled transparently by the
+            kernel but invalid accesses will generate this event.
+
+        dsisr:
+            This field gives information on the type of fault. It is a
+            copy of the DSISR from the PSL hardware when the address
+            fault occurred. The form of the DSISR is as defined in the
+            CAIA.
+
+        reserved fields:
+            For future extensions
+
+    If the event type is CXL_EVENT_AFU_ERROR then the event structure
+    is defined as:
+
+            struct cxl_event_afu_error {
+                    __u16 flags;
+                    __u16 reserved1;
+                    __u32 reserved2;
+                    __u64 error;
+            };
+
+        flags:
+            These flags indicate which optional fields are present in
+            this struct. Currently all fields are Mandatory.
+
+        error:
+            Error status from the AFU. Defined by the AFU.
+
+        reserved fields:
+            For future extensions and padding
+
+Sysfs Class
+===========
+
+    A cxl sysfs class is added under /sys/class/cxl to facilitate
+    enumeration and tuning of the accelerators. Its layout is
+    described in Documentation/ABI/testing/sysfs-class-cxl
+
+Udev rules
+==========
+
+    The following udev rules could be used to create a symlink to the
+    most logical chardev to use in any programming mode (afuX.Yd for
+    dedicated, afuX.Ys for afu directed), since the API is virtually
+    identical for each:
+
+	SUBSYSTEM=="cxl", ATTRS{mode}=="dedicated_process", SYMLINK="cxl/%b"
+	SUBSYSTEM=="cxl", ATTRS{mode}=="afu_directed", \
+	                  KERNEL=="afu[0-9]*.[0-9]*s", SYMLINK="cxl/%b"
diff --git a/MAINTAINERS b/MAINTAINERS
index 809ecd680d88..facc2198eaba 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2711,6 +2711,18 @@ W:	http://www.chelsio.com
 S:	Supported
 F:	drivers/net/ethernet/chelsio/cxgb4vf/
 
+CXL (IBM Coherent Accelerator Processor Interface CAPI) DRIVER
+M:	Ian Munsie <imunsie@au1.ibm.com>
+M:	Michael Neuling <mikey@neuling.org>
+L:	linuxppc-dev@lists.ozlabs.org
+S:	Supported
+F:	drivers/misc/cxl/
+F:	include/misc/cxl.h
+F:	include/uapi/misc/cxl.h
+F:	Documentation/powerpc/cxl.txt
+F:	Documentation/powerpc/cxl.txt
+F:	Documentation/ABI/testing/sysfs-class-cxl
+
 STMMAC ETHERNET DRIVER
 M:	Giuseppe Cavallaro <peppe.cavallaro@st.com>
 L:	netdev@vger.kernel.org
diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h
index c232be6ae21f..cd6d789b73ec 100644
--- a/include/uapi/misc/cxl.h
+++ b/include/uapi/misc/cxl.h
@@ -13,7 +13,7 @@
 #include <linux/types.h>
 #include <linux/ioctl.h>
 
-/* Structs for IOCTLS for userspace to talk to the kernel */
+
 struct cxl_ioctl_start_work {
 	__u64 flags;
 	__u64 work_element_descriptor;
@@ -26,19 +26,20 @@ struct cxl_ioctl_start_work {
 	__u64 reserved5;
 	__u64 reserved6;
 };
+
 #define CXL_START_WORK_AMR		0x0000000000000001ULL
 #define CXL_START_WORK_NUM_IRQS		0x0000000000000002ULL
 #define CXL_START_WORK_ALL		(CXL_START_WORK_AMR |\
 					 CXL_START_WORK_NUM_IRQS)
 
-/* IOCTL numbers */
+/* ioctl numbers */
 #define CXL_MAGIC 0xCA
 #define CXL_IOCTL_START_WORK		_IOW(CXL_MAGIC, 0x00, struct cxl_ioctl_start_work)
 #define CXL_IOCTL_GET_PROCESS_ELEMENT	_IOR(CXL_MAGIC, 0x01, __u32)
 
-/* Events from read() */
 #define CXL_READ_MIN_SIZE 0x1000 /* 4K */
 
+/* Events from read() */
 enum cxl_event_type {
 	CXL_EVENT_RESERVED      = 0,
 	CXL_EVENT_AFU_INTERRUPT = 1,
-- 
cgit v1.2.3


From 8070361799ae1e3f4ef347bd10f0a508ac10acfb Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 6 Oct 2014 17:53:53 +0200
Subject: s390: add support for vector extension

The vector extension introduces 32 128-bit vector registers and a set of
instruction to operate on the vector registers.

The kernel can control the use of vector registers for the problem state
program with a bit in control register 0. Once enabled for a process the
kernel needs to retain the content of the vector registers on context
switch. The signal frame is extended to include the vector registers.
Two new register sets NT_S390_VXRS_LOW and NT_S390_VXRS_HIGH are added
to the regset interface for the debugger and core dumps.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/elf.h             |   1 +
 arch/s390/include/asm/lowcore.h         |  10 +-
 arch/s390/include/asm/nmi.h             |   2 +-
 arch/s390/include/asm/processor.h       |   1 +
 arch/s390/include/asm/setup.h           |   3 +
 arch/s390/include/asm/switch_to.h       |  48 +++++-
 arch/s390/include/uapi/asm/sigcontext.h |  20 ++-
 arch/s390/include/uapi/asm/types.h      |   4 +
 arch/s390/include/uapi/asm/ucontext.h   |  15 +-
 arch/s390/kernel/compat_linux.h         |   9 +
 arch/s390/kernel/compat_signal.c        | 212 +++++++++++++++++------
 arch/s390/kernel/early.c                |   2 +
 arch/s390/kernel/entry.h                |   3 +
 arch/s390/kernel/nmi.c                  |  16 ++
 arch/s390/kernel/pgm_check.S            |   2 +-
 arch/s390/kernel/processor.c            |   2 +-
 arch/s390/kernel/ptrace.c               | 249 +++++++++++++++++++++------
 arch/s390/kernel/setup.c                |   9 +
 arch/s390/kernel/signal.c               | 296 +++++++++++++++++++++++++-------
 arch/s390/kernel/smp.c                  |   3 +
 arch/s390/kernel/traps.c                |  82 +++++++++
 include/uapi/linux/elf.h                |   2 +
 22 files changed, 804 insertions(+), 187 deletions(-)

(limited to 'include/uapi')

diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 78f4f8711d58..27735ae06a78 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -102,6 +102,7 @@
 #define HWCAP_S390_ETF3EH	256
 #define HWCAP_S390_HIGH_GPRS	512
 #define HWCAP_S390_TE		1024
+#define HWCAP_S390_VXRS		2048
 
 /*
  * These are used to set parameters in the core dumps.
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 4349197ab9df..d812cf1a8177 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -310,7 +310,10 @@ struct _lowcore {
 
 	/* Extended facility list */
 	__u64	stfle_fac_list[32];		/* 0x0f00 */
-	__u8	pad_0x1000[0x11b8-0x1000];	/* 0x1000 */
+	__u8	pad_0x1000[0x11b0-0x1000];	/* 0x1000 */
+
+	/* Pointer to vector register save area */
+	__u64	vector_save_area_addr;		/* 0x11b0 */
 
 	/* 64 bit extparam used for pfault/diag 250: defined by architecture */
 	__u64	ext_params2;			/* 0x11B8 */
@@ -334,9 +337,10 @@ struct _lowcore {
 
 	/* Transaction abort diagnostic block */
 	__u8	pgm_tdb[256];			/* 0x1800 */
+	__u8	pad_0x1900[0x1c00-0x1900];	/* 0x1900 */
 
-	/* align to the top of the prefix area */
-	__u8	pad_0x1900[0x2000-0x1900];	/* 0x1900 */
+	/* Software defined save area for vector registers */
+	__u8	vector_save_area[1024];		/* 0x1c00 */
 } __packed;
 
 #endif /* CONFIG_32BIT */
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index 35f8ec185616..3027a5a72b74 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -38,7 +38,7 @@ struct mci {
 	__u32 pm :  1; /* 22 psw program mask and cc validity */
 	__u32 ia :  1; /* 23 psw instruction address validity */
 	__u32 fa :  1; /* 24 failing storage address validity */
-	__u32	 :  1; /* 25 */
+	__u32 vr :  1; /* 25 vector register validity */
 	__u32 ec :  1; /* 26 external damage code validity */
 	__u32 fp :  1; /* 27 floating point register validity */
 	__u32 gr :  1; /* 28 general register validity */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 3d0871058306..d559bdb03d18 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -117,6 +117,7 @@ struct thread_struct {
 	int ri_signum;
 #ifdef CONFIG_64BIT
 	unsigned char trap_tdb[256];	/* Transaction abort diagnose block */
+	__vector128 *vxrs;		/* Vector register save area */
 #endif
 };
 
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index dbde7c236577..7736fdd72595 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -56,6 +56,7 @@ extern void detect_memory_memblock(void);
 #define MACHINE_FLAG_TOPOLOGY	(1UL << 14)
 #define MACHINE_FLAG_TE		(1UL << 15)
 #define MACHINE_FLAG_TLB_LC	(1UL << 17)
+#define MACHINE_FLAG_VX		(1UL << 18)
 
 #define MACHINE_IS_VM		(S390_lowcore.machine_flags & MACHINE_FLAG_VM)
 #define MACHINE_IS_KVM		(S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
@@ -78,6 +79,7 @@ extern void detect_memory_memblock(void);
 #define MACHINE_HAS_TOPOLOGY	(0)
 #define MACHINE_HAS_TE		(0)
 #define MACHINE_HAS_TLB_LC	(0)
+#define MACHINE_HAS_VX		(0)
 #else /* CONFIG_64BIT */
 #define MACHINE_HAS_IEEE	(1)
 #define MACHINE_HAS_CSP		(1)
@@ -90,6 +92,7 @@ extern void detect_memory_memblock(void);
 #define MACHINE_HAS_TOPOLOGY	(S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
 #define MACHINE_HAS_TE		(S390_lowcore.machine_flags & MACHINE_FLAG_TE)
 #define MACHINE_HAS_TLB_LC	(S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
+#define MACHINE_HAS_VX		(S390_lowcore.machine_flags & MACHINE_FLAG_VX)
 #endif /* CONFIG_64BIT */
 
 /*
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index 18ea9e3f8142..0e0109578021 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -103,6 +103,48 @@ static inline void restore_fp_regs(freg_t *fprs)
 	asm volatile("ld 15,%0" : : "Q" (fprs[15]));
 }
 
+static inline void save_vx_regs(__vector128 *vxrs)
+{
+	typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
+
+	asm volatile(
+		"	la	1,%0\n"
+		"	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
+		"	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
+		: "=Q" (*(addrtype *) vxrs) : : "1");
+}
+
+static inline void restore_vx_regs(__vector128 *vxrs)
+{
+	typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
+
+	asm volatile(
+		"	la	1,%0\n"
+		"	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
+		"	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
+		: : "Q" (*(addrtype *) vxrs) : "1");
+}
+
+static inline void save_fp_vx_regs(struct task_struct *task)
+{
+#ifdef CONFIG_64BIT
+	if (task->thread.vxrs)
+		save_vx_regs(task->thread.vxrs);
+	else
+#endif
+	save_fp_regs(task->thread.fp_regs.fprs);
+}
+
+static inline void restore_fp_vx_regs(struct task_struct *task)
+{
+#ifdef CONFIG_64BIT
+	if (task->thread.vxrs)
+		restore_vx_regs(task->thread.vxrs);
+	else
+#endif
+	restore_fp_regs(task->thread.fp_regs.fprs);
+}
+
 static inline void save_access_regs(unsigned int *acrs)
 {
 	typedef struct { int _[NUM_ACRS]; } acrstype;
@@ -120,16 +162,16 @@ static inline void restore_access_regs(unsigned int *acrs)
 #define switch_to(prev,next,last) do {					\
 	if (prev->mm) {							\
 		save_fp_ctl(&prev->thread.fp_regs.fpc);			\
-		save_fp_regs(prev->thread.fp_regs.fprs);		\
+		save_fp_vx_regs(prev);					\
 		save_access_regs(&prev->thread.acrs[0]);		\
 		save_ri_cb(prev->thread.ri_cb);				\
 	}								\
 	if (next->mm) {							\
+		update_cr_regs(next);					\
 		restore_fp_ctl(&next->thread.fp_regs.fpc);		\
-		restore_fp_regs(next->thread.fp_regs.fprs);		\
+		restore_fp_vx_regs(next);				\
 		restore_access_regs(&next->thread.acrs[0]);		\
 		restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);	\
-		update_cr_regs(next);					\
 	}								\
 	prev = __switch_to(prev,next);					\
 } while (0)
diff --git a/arch/s390/include/uapi/asm/sigcontext.h b/arch/s390/include/uapi/asm/sigcontext.h
index b30de9c01bbe..5f0b8d7ddb0b 100644
--- a/arch/s390/include/uapi/asm/sigcontext.h
+++ b/arch/s390/include/uapi/asm/sigcontext.h
@@ -7,10 +7,14 @@
 #define _ASM_S390_SIGCONTEXT_H
 
 #include <linux/compiler.h>
+#include <linux/types.h>
 
-#define __NUM_GPRS 16
-#define __NUM_FPRS 16
-#define __NUM_ACRS 16
+#define __NUM_GPRS		16
+#define __NUM_FPRS		16
+#define __NUM_ACRS		16
+#define __NUM_VXRS		32
+#define __NUM_VXRS_LOW		16
+#define __NUM_VXRS_HIGH		16
 
 #ifndef __s390x__
 
@@ -59,6 +63,16 @@ typedef struct
 	_s390_fp_regs     fpregs;
 } _sigregs;
 
+typedef struct
+{
+#ifndef __s390x__
+	unsigned long gprs_high[__NUM_GPRS];
+#endif
+	unsigned long long vxrs_low[__NUM_VXRS_LOW];
+	__vector128 vxrs_high[__NUM_VXRS_HIGH];
+	unsigned char __reserved[128];
+} _sigregs_ext;
+
 struct sigcontext
 {
 	unsigned long	oldmask[_SIGCONTEXT_NSIG_WORDS];
diff --git a/arch/s390/include/uapi/asm/types.h b/arch/s390/include/uapi/asm/types.h
index 038f2b9178a4..3c3951e3415b 100644
--- a/arch/s390/include/uapi/asm/types.h
+++ b/arch/s390/include/uapi/asm/types.h
@@ -17,6 +17,10 @@
 typedef unsigned long addr_t; 
 typedef __signed__ long saddr_t;
 
+typedef struct {
+	__u32 u[4];
+} __vector128;
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _UAPI_S390_TYPES_H */
diff --git a/arch/s390/include/uapi/asm/ucontext.h b/arch/s390/include/uapi/asm/ucontext.h
index 3e077b2a4705..64a69aa5dde0 100644
--- a/arch/s390/include/uapi/asm/ucontext.h
+++ b/arch/s390/include/uapi/asm/ucontext.h
@@ -7,10 +7,15 @@
 #ifndef _ASM_S390_UCONTEXT_H
 #define _ASM_S390_UCONTEXT_H
 
-#define UC_EXTENDED	0x00000001
-
-#ifndef __s390x__
+#define UC_GPRS_HIGH	1	/* uc_mcontext_ext has valid high gprs */
+#define UC_VXRS		2	/* uc_mcontext_ext has valid vector regs */
 
+/*
+ * The struct ucontext_extended describes how the registers are stored
+ * on a rt signal frame. Please note that the structure is not fixed,
+ * if new CPU registers are added to the user state the size of the
+ * struct ucontext_extended will increase.
+ */
 struct ucontext_extended {
 	unsigned long	  uc_flags;
 	struct ucontext  *uc_link;
@@ -19,11 +24,9 @@ struct ucontext_extended {
 	sigset_t	  uc_sigmask;
 	/* Allow for uc_sigmask growth.  Glibc uses a 1024-bit sigset_t.  */
 	unsigned char	  __unused[128 - sizeof(sigset_t)];
-	unsigned long	  uc_gprs_high[16];
+	_sigregs_ext	  uc_mcontext_ext;
 };
 
-#endif
-
 struct ucontext {
 	unsigned long	  uc_flags;
 	struct ucontext  *uc_link;
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
index 70d4b7c4beaa..a0a886c04977 100644
--- a/arch/s390/kernel/compat_linux.h
+++ b/arch/s390/kernel/compat_linux.h
@@ -50,6 +50,14 @@ typedef struct
 	_s390_fp_regs32     fpregs;
 } _sigregs32;
 
+typedef struct
+{
+	__u32 gprs_high[__NUM_GPRS];
+	__u64 vxrs_low[__NUM_VXRS_LOW];
+	__vector128 vxrs_high[__NUM_VXRS_HIGH];
+	__u8 __reserved[128];
+} _sigregs_ext32;
+
 #define _SIGCONTEXT_NSIG32	64
 #define _SIGCONTEXT_NSIG_BPW32	32
 #define __SIGNAL_FRAMESIZE32	96
@@ -72,6 +80,7 @@ struct ucontext32 {
 	compat_sigset_t		uc_sigmask;
 	/* Allow for uc_sigmask growth.  Glibc uses a 1024-bit sigset_t.  */
 	unsigned char		__unused[128 - sizeof(compat_sigset_t)];
+	_sigregs_ext32		uc_mcontext_ext;
 };
 
 struct stat64_emu31;
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 598b0b42668b..009f5eb11125 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -36,17 +36,16 @@ typedef struct
 	struct sigcontext32 sc;
 	_sigregs32 sregs;
 	int signo;
-	__u32 gprs_high[NUM_GPRS];
-	__u8 retcode[S390_SYSCALL_SIZE];
+	_sigregs_ext32 sregs_ext;
+	__u16 svc_insn;		/* Offset of svc_insn is NOT fixed! */
 } sigframe32;
 
 typedef struct 
 {
 	__u8 callee_used_stack[__SIGNAL_FRAMESIZE32];
-	__u8 retcode[S390_SYSCALL_SIZE];
+	__u16 svc_insn;
 	compat_siginfo_t info;
 	struct ucontext32 uc;
-	__u32 gprs_high[NUM_GPRS];
 } rt_sigframe32;
 
 int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
@@ -151,6 +150,38 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 	return err ? -EFAULT : 0;
 }
 
+/* Store registers needed to create the signal frame */
+static void store_sigregs(void)
+{
+	int i;
+
+	save_access_regs(current->thread.acrs);
+	save_fp_ctl(&current->thread.fp_regs.fpc);
+	if (current->thread.vxrs) {
+		save_vx_regs(current->thread.vxrs);
+		for (i = 0; i < __NUM_FPRS; i++)
+			current->thread.fp_regs.fprs[i] =
+				*(freg_t *)(current->thread.vxrs + i);
+	} else
+		save_fp_regs(current->thread.fp_regs.fprs);
+}
+
+/* Load registers after signal return */
+static void load_sigregs(void)
+{
+	int i;
+
+	restore_access_regs(current->thread.acrs);
+	/* restore_fp_ctl is done in restore_sigregs */
+	if (current->thread.vxrs) {
+		for (i = 0; i < __NUM_FPRS; i++)
+			*(freg_t *)(current->thread.vxrs + i) =
+				current->thread.fp_regs.fprs[i];
+		restore_vx_regs(current->thread.vxrs);
+	} else
+		restore_fp_regs(current->thread.fp_regs.fprs);
+}
+
 static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
 {
 	_sigregs32 user_sregs;
@@ -163,11 +194,8 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
 		(__u32)(regs->psw.mask & PSW_MASK_BA);
 	for (i = 0; i < NUM_GPRS; i++)
 		user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
-	save_access_regs(current->thread.acrs);
 	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
 	       sizeof(user_sregs.regs.acrs));
-	save_fp_ctl(&current->thread.fp_regs.fpc);
-	save_fp_regs(current->thread.fp_regs.fprs);
 	memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
 	       sizeof(user_sregs.fpregs));
 	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
@@ -207,37 +235,67 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
 		regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
 	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
 	       sizeof(current->thread.acrs));
-	restore_access_regs(current->thread.acrs);
 
 	memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
 	       sizeof(current->thread.fp_regs));
 
-	restore_fp_regs(current->thread.fp_regs.fprs);
 	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
 	return 0;
 }
 
-static int save_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs)
+static int save_sigregs_ext32(struct pt_regs *regs,
+			      _sigregs_ext32 __user *sregs_ext)
 {
 	__u32 gprs_high[NUM_GPRS];
+	__u64 vxrs[__NUM_VXRS_LOW];
 	int i;
 
+	/* Save high gprs to signal stack */
 	for (i = 0; i < NUM_GPRS; i++)
 		gprs_high[i] = regs->gprs[i] >> 32;
-	if (__copy_to_user(uregs, &gprs_high, sizeof(gprs_high)))
+	if (__copy_to_user(&sregs_ext->gprs_high, &gprs_high,
+			   sizeof(sregs_ext->gprs_high)))
 		return -EFAULT;
+
+	/* Save vector registers to signal stack */
+	if (current->thread.vxrs) {
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
+				   sizeof(sregs_ext->vxrs_low)) ||
+		    __copy_to_user(&sregs_ext->vxrs_high,
+				   current->thread.vxrs + __NUM_VXRS_LOW,
+				   sizeof(sregs_ext->vxrs_high)))
+			return -EFAULT;
+	}
 	return 0;
 }
 
-static int restore_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs)
+static int restore_sigregs_ext32(struct pt_regs *regs,
+				 _sigregs_ext32 __user *sregs_ext)
 {
 	__u32 gprs_high[NUM_GPRS];
+	__u64 vxrs[__NUM_VXRS_LOW];
 	int i;
 
-	if (__copy_from_user(&gprs_high, uregs, sizeof(gprs_high)))
+	/* Restore high gprs from signal stack */
+	if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high,
+			     sizeof(&sregs_ext->gprs_high)))
 		return -EFAULT;
 	for (i = 0; i < NUM_GPRS; i++)
 		*(__u32 *)&regs->gprs[i] = gprs_high[i];
+
+	/* Restore vector registers from signal stack */
+	if (current->thread.vxrs) {
+		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
+				     sizeof(sregs_ext->vxrs_low)) ||
+		    __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+				     &sregs_ext->vxrs_high,
+				     sizeof(sregs_ext->vxrs_high)))
+			return -EFAULT;
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			*((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+	}
 	return 0;
 }
 
@@ -252,8 +310,9 @@ COMPAT_SYSCALL_DEFINE0(sigreturn)
 	set_current_blocked(&set);
 	if (restore_sigregs32(regs, &frame->sregs))
 		goto badframe;
-	if (restore_sigregs_gprs_high(regs, frame->gprs_high))
+	if (restore_sigregs_ext32(regs, &frame->sregs_ext))
 		goto badframe;
+	load_sigregs();
 	return regs->gprs[2];
 badframe:
 	force_sig(SIGSEGV, current);
@@ -269,12 +328,13 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 	set_current_blocked(&set);
+	if (compat_restore_altstack(&frame->uc.uc_stack))
+		goto badframe;
 	if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
 		goto badframe;
-	if (restore_sigregs_gprs_high(regs, frame->gprs_high))
+	if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
 		goto badframe;
-	if (compat_restore_altstack(&frame->uc.uc_stack))
-		goto badframe; 
+	load_sigregs();
 	return regs->gprs[2];
 badframe:
 	force_sig(SIGSEGV, current);
@@ -324,37 +384,64 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set,
 			 struct pt_regs *regs)
 {
 	int sig = ksig->sig;
-	sigframe32 __user *frame = get_sigframe(&ksig->ka, regs, sizeof(sigframe32));
-
+	sigframe32 __user *frame;
+	struct sigcontext32 sc;
+	unsigned long restorer;
+	size_t frame_size;
+
+	/*
+	 * gprs_high are always present for 31-bit compat tasks.
+	 * The space for vector registers is only allocated if
+	 * the machine supports it
+	 */
+	frame_size = sizeof(*frame) - sizeof(frame->sregs_ext.__reserved);
+	if (!MACHINE_HAS_VX)
+		frame_size -= sizeof(frame->sregs_ext.vxrs_low) +
+			      sizeof(frame->sregs_ext.vxrs_high);
+	frame = get_sigframe(&ksig->ka, regs, frame_size);
 	if (frame == (void __user *) -1UL)
 		return -EFAULT;
 
-	if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32))
+	/* Set up backchain. */
+	if (__put_user(regs->gprs[15], (unsigned int __user *) frame))
+		return -EFAULT;
+
+	/* Create struct sigcontext32 on the signal stack */
+	memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32);
+	sc.sregs = (__u32)(unsigned long __force) &frame->sregs;
+	if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc)))
 		return -EFAULT;
 
+	/* Store registers needed to create the signal frame */
+	store_sigregs();
+
+	/* Create _sigregs32 on the signal stack */
 	if (save_sigregs32(regs, &frame->sregs))
 		return -EFAULT;
-	if (save_sigregs_gprs_high(regs, frame->gprs_high))
+
+	/* Place signal number on stack to allow backtrace from handler.  */
+	if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo))
 		return -EFAULT;
-	if (__put_user((unsigned long) &frame->sregs, &frame->sc.sregs))
+
+	/* Create _sigregs_ext32 on the signal stack */
+	if (save_sigregs_ext32(regs, &frame->sregs_ext))
 		return -EFAULT;
 
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
 	if (ksig->ka.sa.sa_flags & SA_RESTORER) {
-		regs->gprs[14] = (__u64 __force) ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
+		restorer = (unsigned long __force)
+			ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
 	} else {
-		regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE;
-		if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn,
-			       (u16 __force __user *)(frame->retcode)))
+		/* Signal frames without vectors registers are short ! */
+		__u16 __user *svc = (void *) frame + frame_size - 2;
+		if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
 			return -EFAULT;
+		restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE;
         }
 
-	/* Set up backchain. */
-	if (__put_user(regs->gprs[15], (unsigned int __user *) frame))
-		return -EFAULT;
-
 	/* Set up registers for signal handler */
+	regs->gprs[14] = restorer;
 	regs->gprs[15] = (__force __u64) frame;
 	/* Force 31 bit amode and default user address space control. */
 	regs->psw.mask = PSW_MASK_BA |
@@ -375,50 +462,69 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set,
 		regs->gprs[6] = task_thread_info(current)->last_break;
 	}
 
-	/* Place signal number on stack to allow backtrace from handler.  */
-	if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo))
-		return -EFAULT;
 	return 0;
 }
 
 static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
 			    struct pt_regs *regs)
 {
-	int err = 0;
-	rt_sigframe32 __user *frame = get_sigframe(&ksig->ka, regs, sizeof(rt_sigframe32));
-
+	rt_sigframe32 __user *frame;
+	unsigned long restorer;
+	size_t frame_size;
+	u32 uc_flags;
+
+	frame_size = sizeof(*frame) -
+		     sizeof(frame->uc.uc_mcontext_ext.__reserved);
+	/*
+	 * gprs_high are always present for 31-bit compat tasks.
+	 * The space for vector registers is only allocated if
+	 * the machine supports it
+	 */
+	uc_flags = UC_GPRS_HIGH;
+	if (MACHINE_HAS_VX) {
+		if (current->thread.vxrs)
+			uc_flags |= UC_VXRS;
+	} else
+		frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
+			      sizeof(frame->uc.uc_mcontext_ext.vxrs_high);
+	frame = get_sigframe(&ksig->ka, regs, frame_size);
 	if (frame == (void __user *) -1UL)
 		return -EFAULT;
 
-	if (copy_siginfo_to_user32(&frame->info, &ksig->info))
-		return -EFAULT;
-
-	/* Create the ucontext.  */
-	err |= __put_user(UC_EXTENDED, &frame->uc.uc_flags);
-	err |= __put_user(0, &frame->uc.uc_link);
-	err |= __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]);
-	err |= save_sigregs32(regs, &frame->uc.uc_mcontext);
-	err |= save_sigregs_gprs_high(regs, frame->gprs_high);
-	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-	if (err)
+	/* Set up backchain. */
+	if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame))
 		return -EFAULT;
 
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
 	if (ksig->ka.sa.sa_flags & SA_RESTORER) {
-		regs->gprs[14] = (__u64 __force) ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
+		restorer = (unsigned long __force)
+			ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
 	} else {
-		regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE;
-		if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
-			       (u16 __force __user *)(frame->retcode)))
+		__u16 __user *svc = &frame->svc_insn;
+		if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
 			return -EFAULT;
+		restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE;
 	}
 
-	/* Set up backchain. */
-	if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame))
+	/* Create siginfo on the signal stack */
+	if (copy_siginfo_to_user32(&frame->info, &ksig->info))
+		return -EFAULT;
+
+	/* Store registers needed to create the signal frame */
+	store_sigregs();
+
+	/* Create ucontext on the signal stack. */
+	if (__put_user(uc_flags, &frame->uc.uc_flags) ||
+	    __put_user(0, &frame->uc.uc_link) ||
+	    __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) ||
+	    save_sigregs32(regs, &frame->uc.uc_mcontext) ||
+	    __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) ||
+	    save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
 		return -EFAULT;
 
 	/* Set up registers for signal handler */
+	regs->gprs[14] = restorer;
 	regs->gprs[15] = (__force __u64) frame;
 	/* Force 31 bit amode and default user address space control. */
 	regs->psw.mask = PSW_MASK_BA |
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index f6c66b5d0cfa..cef2879edff3 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -392,6 +392,8 @@ static __init void detect_machine_facilities(void)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
 	if (test_facility(51))
 		S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
+	if (test_facility(129))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
 #endif
 }
 
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index cd68869f9504..0554b9771c9f 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -21,6 +21,8 @@ void psw_idle(struct s390_idle_data *, unsigned long);
 asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
 asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
 
+int alloc_vector_registers(struct task_struct *tsk);
+
 void do_protection_exception(struct pt_regs *regs);
 void do_dat_exception(struct pt_regs *regs);
 
@@ -43,6 +45,7 @@ void special_op_exception(struct pt_regs *regs);
 void specification_exception(struct pt_regs *regs);
 void transaction_exception(struct pt_regs *regs);
 void translation_exception(struct pt_regs *regs);
+void vector_exception(struct pt_regs *regs);
 
 void do_per_trap(struct pt_regs *regs);
 void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str);
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 210e1285f75a..db96b418160a 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -20,6 +20,7 @@
 #include <asm/cputime.h>
 #include <asm/nmi.h>
 #include <asm/crw.h>
+#include <asm/switch_to.h>
 
 struct mcck_struct {
 	int kill_task;
@@ -163,6 +164,21 @@ static int notrace s390_revalidate_registers(struct mci *mci)
 			"	ld	15,120(%0)\n"
 			: : "a" (fpt_save_area));
 	}
+
+#ifdef CONFIG_64BIT
+	/* Revalidate vector registers */
+	if (MACHINE_HAS_VX && current->thread.vxrs) {
+		if (!mci->vr) {
+			/*
+			 * Vector registers can't be restored and therefore
+			 * the process needs to be terminated.
+			 */
+			kill_task = 1;
+		}
+		restore_vx_regs((__vector128 *)
+				S390_lowcore.vector_save_area_addr);
+	}
+#endif
 	/* Revalidate access registers */
 	asm volatile(
 		"	lam	0,15,0(%0)"
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
index 813ec7260878..f6f8886399f6 100644
--- a/arch/s390/kernel/pgm_check.S
+++ b/arch/s390/kernel/pgm_check.S
@@ -49,7 +49,7 @@ PGM_CHECK_DEFAULT			/* 17 */
 PGM_CHECK_64BIT(transaction_exception)	/* 18 */
 PGM_CHECK_DEFAULT			/* 19 */
 PGM_CHECK_DEFAULT			/* 1a */
-PGM_CHECK_DEFAULT			/* 1b */
+PGM_CHECK_64BIT(vector_exception)	/* 1b */
 PGM_CHECK(space_switch_exception)	/* 1c */
 PGM_CHECK(hfp_sqrt_exception)		/* 1d */
 PGM_CHECK_DEFAULT			/* 1e */
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 32587cc76306..edefead3b43a 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -39,7 +39,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 {
 	static const char *hwcap_str[] = {
 		"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
-		"edat", "etf3eh", "highgprs", "te"
+		"edat", "etf3eh", "highgprs", "te", "vx"
 	};
 	unsigned long n = (unsigned long) v - 1;
 	int i;
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index fe99d6b3f185..0ecfdb3c6f8e 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -38,15 +38,6 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
 
-enum s390_regset {
-	REGSET_GENERAL,
-	REGSET_FP,
-	REGSET_LAST_BREAK,
-	REGSET_TDB,
-	REGSET_SYSTEM_CALL,
-	REGSET_GENERAL_EXTENDED,
-};
-
 void update_cr_regs(struct task_struct *task)
 {
 	struct pt_regs *regs = task_pt_regs(task);
@@ -55,27 +46,39 @@ void update_cr_regs(struct task_struct *task)
 
 #ifdef CONFIG_64BIT
 	/* Take care of the enable/disable of transactional execution. */
-	if (MACHINE_HAS_TE) {
+	if (MACHINE_HAS_TE || MACHINE_HAS_VX) {
 		unsigned long cr, cr_new;
 
 		__ctl_store(cr, 0, 0);
-		/* Set or clear transaction execution TXC bit 8. */
-		cr_new = cr | (1UL << 55);
-		if (task->thread.per_flags & PER_FLAG_NO_TE)
-			cr_new &= ~(1UL << 55);
+		cr_new = cr;
+		if (MACHINE_HAS_TE) {
+			/* Set or clear transaction execution TXC bit 8. */
+			cr_new |= (1UL << 55);
+			if (task->thread.per_flags & PER_FLAG_NO_TE)
+				cr_new &= ~(1UL << 55);
+		}
+		if (MACHINE_HAS_VX) {
+			/* Enable/disable of vector extension */
+			cr_new &= ~(1UL << 17);
+			if (task->thread.vxrs)
+				cr_new |= (1UL << 17);
+		}
 		if (cr_new != cr)
 			__ctl_load(cr_new, 0, 0);
-		/* Set or clear transaction execution TDC bits 62 and 63. */
-		__ctl_store(cr, 2, 2);
-		cr_new = cr & ~3UL;
-		if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
-			if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
-				cr_new |= 1UL;
-			else
-				cr_new |= 2UL;
+		if (MACHINE_HAS_TE) {
+			/* Set/clear transaction execution TDC bits 62/63. */
+			__ctl_store(cr, 2, 2);
+			cr_new = cr & ~3UL;
+			if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+				if (task->thread.per_flags &
+				    PER_FLAG_TE_ABORT_RAND_TEND)
+					cr_new |= 1UL;
+				else
+					cr_new |= 2UL;
+			}
+			if (cr_new != cr)
+				__ctl_load(cr_new, 2, 2);
 		}
-		if (cr_new != cr)
-			__ctl_load(cr_new, 2, 2);
 	}
 #endif
 	/* Copy user specified PER registers */
@@ -926,7 +929,15 @@ static int s390_fpregs_get(struct task_struct *target,
 		save_fp_ctl(&target->thread.fp_regs.fpc);
 		save_fp_regs(target->thread.fp_regs.fprs);
 	}
+#ifdef CONFIG_64BIT
+	else if (target->thread.vxrs) {
+		int i;
 
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			target->thread.fp_regs.fprs[i] =
+				*(freg_t *)(target->thread.vxrs + i);
+	}
+#endif
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 				   &target->thread.fp_regs, 0, -1);
 }
@@ -960,9 +971,20 @@ static int s390_fpregs_set(struct task_struct *target,
 					target->thread.fp_regs.fprs,
 					offsetof(s390_fp_regs, fprs), -1);
 
-	if (rc == 0 && target == current) {
-		restore_fp_ctl(&target->thread.fp_regs.fpc);
-		restore_fp_regs(target->thread.fp_regs.fprs);
+	if (rc == 0) {
+		if (target == current) {
+			restore_fp_ctl(&target->thread.fp_regs.fpc);
+			restore_fp_regs(target->thread.fp_regs.fprs);
+		}
+#ifdef CONFIG_64BIT
+		else if (target->thread.vxrs) {
+			int i;
+
+			for (i = 0; i < __NUM_VXRS_LOW; i++)
+				*(freg_t *)(target->thread.vxrs + i) =
+					target->thread.fp_regs.fprs[i];
+		}
+#endif
 	}
 
 	return rc;
@@ -1018,6 +1040,95 @@ static int s390_tdb_set(struct task_struct *target,
 	return 0;
 }
 
+static int s390_vxrs_active(struct task_struct *target,
+			      const struct user_regset *regset)
+{
+	return !!target->thread.vxrs;
+}
+
+static int s390_vxrs_low_get(struct task_struct *target,
+			     const struct user_regset *regset,
+			     unsigned int pos, unsigned int count,
+			     void *kbuf, void __user *ubuf)
+{
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i;
+
+	if (target->thread.vxrs) {
+		if (target == current)
+			save_vx_regs(target->thread.vxrs);
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			vxrs[i] = *((__u64 *)(target->thread.vxrs + i) + 1);
+	} else
+		memset(vxrs, 0, sizeof(vxrs));
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+}
+
+static int s390_vxrs_low_set(struct task_struct *target,
+			     const struct user_regset *regset,
+			     unsigned int pos, unsigned int count,
+			     const void *kbuf, const void __user *ubuf)
+{
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i, rc;
+
+	if (!target->thread.vxrs) {
+		rc = alloc_vector_registers(target);
+		if (rc)
+			return rc;
+	} else if (target == current)
+		save_vx_regs(target->thread.vxrs);
+
+	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+	if (rc == 0) {
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			*((__u64 *)(target->thread.vxrs + i) + 1) = vxrs[i];
+		if (target == current)
+			restore_vx_regs(target->thread.vxrs);
+	}
+
+	return rc;
+}
+
+static int s390_vxrs_high_get(struct task_struct *target,
+			      const struct user_regset *regset,
+			      unsigned int pos, unsigned int count,
+			      void *kbuf, void __user *ubuf)
+{
+	__vector128 vxrs[__NUM_VXRS_HIGH];
+
+	if (target->thread.vxrs) {
+		if (target == current)
+			save_vx_regs(target->thread.vxrs);
+		memcpy(vxrs, target->thread.vxrs + __NUM_VXRS_LOW,
+		       sizeof(vxrs));
+	} else
+		memset(vxrs, 0, sizeof(vxrs));
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+}
+
+static int s390_vxrs_high_set(struct task_struct *target,
+			      const struct user_regset *regset,
+			      unsigned int pos, unsigned int count,
+			      const void *kbuf, const void __user *ubuf)
+{
+	int rc;
+
+	if (!target->thread.vxrs) {
+		rc = alloc_vector_registers(target);
+		if (rc)
+			return rc;
+	} else if (target == current)
+		save_vx_regs(target->thread.vxrs);
+
+	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				target->thread.vxrs + __NUM_VXRS_LOW, 0, -1);
+	if (rc == 0 && target == current)
+		restore_vx_regs(target->thread.vxrs);
+
+	return rc;
+}
+
 #endif
 
 static int s390_system_call_get(struct task_struct *target,
@@ -1041,7 +1152,7 @@ static int s390_system_call_set(struct task_struct *target,
 }
 
 static const struct user_regset s390_regsets[] = {
-	[REGSET_GENERAL] = {
+	{
 		.core_note_type = NT_PRSTATUS,
 		.n = sizeof(s390_regs) / sizeof(long),
 		.size = sizeof(long),
@@ -1049,7 +1160,7 @@ static const struct user_regset s390_regsets[] = {
 		.get = s390_regs_get,
 		.set = s390_regs_set,
 	},
-	[REGSET_FP] = {
+	{
 		.core_note_type = NT_PRFPREG,
 		.n = sizeof(s390_fp_regs) / sizeof(long),
 		.size = sizeof(long),
@@ -1057,8 +1168,16 @@ static const struct user_regset s390_regsets[] = {
 		.get = s390_fpregs_get,
 		.set = s390_fpregs_set,
 	},
+	{
+		.core_note_type = NT_S390_SYSTEM_CALL,
+		.n = 1,
+		.size = sizeof(unsigned int),
+		.align = sizeof(unsigned int),
+		.get = s390_system_call_get,
+		.set = s390_system_call_set,
+	},
 #ifdef CONFIG_64BIT
-	[REGSET_LAST_BREAK] = {
+	{
 		.core_note_type = NT_S390_LAST_BREAK,
 		.n = 1,
 		.size = sizeof(long),
@@ -1066,7 +1185,7 @@ static const struct user_regset s390_regsets[] = {
 		.get = s390_last_break_get,
 		.set = s390_last_break_set,
 	},
-	[REGSET_TDB] = {
+	{
 		.core_note_type = NT_S390_TDB,
 		.n = 1,
 		.size = 256,
@@ -1074,15 +1193,25 @@ static const struct user_regset s390_regsets[] = {
 		.get = s390_tdb_get,
 		.set = s390_tdb_set,
 	},
-#endif
-	[REGSET_SYSTEM_CALL] = {
-		.core_note_type = NT_S390_SYSTEM_CALL,
-		.n = 1,
-		.size = sizeof(unsigned int),
-		.align = sizeof(unsigned int),
-		.get = s390_system_call_get,
-		.set = s390_system_call_set,
+	{
+		.core_note_type = NT_S390_VXRS_LOW,
+		.n = __NUM_VXRS_LOW,
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.active = s390_vxrs_active,
+		.get = s390_vxrs_low_get,
+		.set = s390_vxrs_low_set,
+	},
+	{
+		.core_note_type = NT_S390_VXRS_HIGH,
+		.n = __NUM_VXRS_HIGH,
+		.size = sizeof(__vector128),
+		.align = sizeof(__vector128),
+		.active = s390_vxrs_active,
+		.get = s390_vxrs_high_get,
+		.set = s390_vxrs_high_set,
 	},
+#endif
 };
 
 static const struct user_regset_view user_s390_view = {
@@ -1247,7 +1376,7 @@ static int s390_compat_last_break_set(struct task_struct *target,
 }
 
 static const struct user_regset s390_compat_regsets[] = {
-	[REGSET_GENERAL] = {
+	{
 		.core_note_type = NT_PRSTATUS,
 		.n = sizeof(s390_compat_regs) / sizeof(compat_long_t),
 		.size = sizeof(compat_long_t),
@@ -1255,7 +1384,7 @@ static const struct user_regset s390_compat_regsets[] = {
 		.get = s390_compat_regs_get,
 		.set = s390_compat_regs_set,
 	},
-	[REGSET_FP] = {
+	{
 		.core_note_type = NT_PRFPREG,
 		.n = sizeof(s390_fp_regs) / sizeof(compat_long_t),
 		.size = sizeof(compat_long_t),
@@ -1263,7 +1392,15 @@ static const struct user_regset s390_compat_regsets[] = {
 		.get = s390_fpregs_get,
 		.set = s390_fpregs_set,
 	},
-	[REGSET_LAST_BREAK] = {
+	{
+		.core_note_type = NT_S390_SYSTEM_CALL,
+		.n = 1,
+		.size = sizeof(compat_uint_t),
+		.align = sizeof(compat_uint_t),
+		.get = s390_system_call_get,
+		.set = s390_system_call_set,
+	},
+	{
 		.core_note_type = NT_S390_LAST_BREAK,
 		.n = 1,
 		.size = sizeof(long),
@@ -1271,7 +1408,7 @@ static const struct user_regset s390_compat_regsets[] = {
 		.get = s390_compat_last_break_get,
 		.set = s390_compat_last_break_set,
 	},
-	[REGSET_TDB] = {
+	{
 		.core_note_type = NT_S390_TDB,
 		.n = 1,
 		.size = 256,
@@ -1279,15 +1416,25 @@ static const struct user_regset s390_compat_regsets[] = {
 		.get = s390_tdb_get,
 		.set = s390_tdb_set,
 	},
-	[REGSET_SYSTEM_CALL] = {
-		.core_note_type = NT_S390_SYSTEM_CALL,
-		.n = 1,
-		.size = sizeof(compat_uint_t),
-		.align = sizeof(compat_uint_t),
-		.get = s390_system_call_get,
-		.set = s390_system_call_set,
+	{
+		.core_note_type = NT_S390_VXRS_LOW,
+		.n = __NUM_VXRS_LOW,
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.active = s390_vxrs_active,
+		.get = s390_vxrs_low_get,
+		.set = s390_vxrs_low_set,
+	},
+	{
+		.core_note_type = NT_S390_VXRS_HIGH,
+		.n = __NUM_VXRS_HIGH,
+		.size = sizeof(__vector128),
+		.align = sizeof(__vector128),
+		.active = s390_vxrs_active,
+		.get = s390_vxrs_high_get,
+		.set = s390_vxrs_high_set,
 	},
-	[REGSET_GENERAL_EXTENDED] = {
+	{
 		.core_note_type = NT_S390_HIGH_GPRS,
 		.n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
 		.size = sizeof(compat_long_t),
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index cdfc060dd319..e80d9ff9a56d 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -343,6 +343,9 @@ static void __init setup_lowcore(void)
 		__ctl_set_bit(14, 29);
 	}
 #else
+	if (MACHINE_HAS_VX)
+		lc->vector_save_area_addr =
+			(unsigned long) &lc->vector_save_area;
 	lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
 #endif
 	lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
@@ -765,6 +768,12 @@ static void __init setup_hwcaps(void)
 	 */
 	if (test_facility(50) && test_facility(73))
 		elf_hwcap |= HWCAP_S390_TE;
+
+	/*
+	 * Vector extension HWCAP_S390_VXRS is bit 11.
+	 */
+	if (test_facility(129))
+		elf_hwcap |= HWCAP_S390_VXRS;
 #endif
 
 	get_cpu_id(&cpu_id);
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 469c4c6d9182..0c1a0ff0a558 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -31,30 +31,117 @@
 #include <asm/switch_to.h>
 #include "entry.h"
 
-typedef struct 
+/*
+ * Layout of an old-style signal-frame:
+ *	-----------------------------------------
+ *	| save area (_SIGNAL_FRAMESIZE)		|
+ *	-----------------------------------------
+ *	| struct sigcontext			|
+ *	|	oldmask				|
+ *	|	_sigregs *			|
+ *	-----------------------------------------
+ *	| _sigregs with				|
+ *	|	_s390_regs_common		|
+ *	|	_s390_fp_regs			|
+ *	-----------------------------------------
+ *	| int signo				|
+ *	-----------------------------------------
+ *	| _sigregs_ext with			|
+ *	|	gprs_high 64 byte (opt)		|
+ *	|	vxrs_low 128 byte (opt)		|
+ *	|	vxrs_high 256 byte (opt)	|
+ *	|	reserved 128 byte (opt)		|
+ *	-----------------------------------------
+ *	| __u16 svc_insn			|
+ *	-----------------------------------------
+ * The svc_insn entry with the sigreturn system call opcode does not
+ * have a fixed position and moves if gprs_high or vxrs exist.
+ * Future extensions will be added to _sigregs_ext.
+ */
+struct sigframe
 {
 	__u8 callee_used_stack[__SIGNAL_FRAMESIZE];
 	struct sigcontext sc;
 	_sigregs sregs;
 	int signo;
-	__u8 retcode[S390_SYSCALL_SIZE];
-} sigframe;
+	_sigregs_ext sregs_ext;
+	__u16 svc_insn;		/* Offset of svc_insn is NOT fixed! */
+};
 
-typedef struct 
+/*
+ * Layout of an rt signal-frame:
+ *	-----------------------------------------
+ *	| save area (_SIGNAL_FRAMESIZE)		|
+ *	-----------------------------------------
+ *	| svc __NR_rt_sigreturn 2 byte		|
+ *	-----------------------------------------
+ *	| struct siginfo			|
+ *	-----------------------------------------
+ *	| struct ucontext_extended with		|
+ *	|	unsigned long uc_flags		|
+ *	|	struct ucontext *uc_link	|
+ *	|	stack_t uc_stack		|
+ *	|	_sigregs uc_mcontext with	|
+ *	|		_s390_regs_common	|
+ *	|		_s390_fp_regs		|
+ *	|	sigset_t uc_sigmask		|
+ *	|	_sigregs_ext uc_mcontext_ext	|
+ *	|		gprs_high 64 byte (opt)	|
+ *	|		vxrs_low 128 byte (opt)	|
+ *	|		vxrs_high 256 byte (opt)|
+ *	|		reserved 128 byte (opt)	|
+ *	-----------------------------------------
+ * Future extensions will be added to _sigregs_ext.
+ */
+struct rt_sigframe
 {
 	__u8 callee_used_stack[__SIGNAL_FRAMESIZE];
-	__u8 retcode[S390_SYSCALL_SIZE];
+	__u16 svc_insn;
 	struct siginfo info;
-	struct ucontext uc;
-} rt_sigframe;
+	struct ucontext_extended uc;
+};
+
+/* Store registers needed to create the signal frame */
+static void store_sigregs(void)
+{
+	save_access_regs(current->thread.acrs);
+	save_fp_ctl(&current->thread.fp_regs.fpc);
+#ifdef CONFIG_64BIT
+	if (current->thread.vxrs) {
+		int i;
+
+		save_vx_regs(current->thread.vxrs);
+		for (i = 0; i < __NUM_FPRS; i++)
+			current->thread.fp_regs.fprs[i] =
+				*(freg_t *)(current->thread.vxrs + i);
+	} else
+#endif
+		save_fp_regs(current->thread.fp_regs.fprs);
+}
+
+/* Load registers after signal return */
+static void load_sigregs(void)
+{
+	restore_access_regs(current->thread.acrs);
+	/* restore_fp_ctl is done in restore_sigregs */
+#ifdef CONFIG_64BIT
+	if (current->thread.vxrs) {
+		int i;
+
+		for (i = 0; i < __NUM_FPRS; i++)
+			*(freg_t *)(current->thread.vxrs + i) =
+				current->thread.fp_regs.fprs[i];
+		restore_vx_regs(current->thread.vxrs);
+	} else
+#endif
+		restore_fp_regs(current->thread.fp_regs.fprs);
+}
 
 /* Returns non-zero on fault. */
 static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 {
 	_sigregs user_sregs;
 
-	save_access_regs(current->thread.acrs);
-
 	/* Copy a 'clean' PSW mask to the user to avoid leaking
 	   information about whether PER is currently on.  */
 	user_sregs.regs.psw.mask = PSW_USER_BITS |
@@ -63,12 +150,6 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
 	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
 	       sizeof(user_sregs.regs.acrs));
-	/* 
-	 * We have to store the fp registers to current->thread.fp_regs
-	 * to merge them with the emulated registers.
-	 */
-	save_fp_ctl(&current->thread.fp_regs.fpc);
-	save_fp_regs(current->thread.fp_regs.fprs);
 	memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
 	       sizeof(user_sregs.fpregs));
 	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
@@ -107,20 +188,64 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	memcpy(&regs->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs));
 	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
 	       sizeof(current->thread.acrs));
-	restore_access_regs(current->thread.acrs);
 
 	memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
 	       sizeof(current->thread.fp_regs));
 
-	restore_fp_regs(current->thread.fp_regs.fprs);
 	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
 	return 0;
 }
 
+/* Returns non-zero on fault. */
+static int save_sigregs_ext(struct pt_regs *regs,
+			    _sigregs_ext __user *sregs_ext)
+{
+#ifdef CONFIG_64BIT
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i;
+
+	/* Save vector registers to signal stack */
+	if (current->thread.vxrs) {
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
+				   sizeof(sregs_ext->vxrs_low)) ||
+		    __copy_to_user(&sregs_ext->vxrs_high,
+				   current->thread.vxrs + __NUM_VXRS_LOW,
+				   sizeof(sregs_ext->vxrs_high)))
+			return -EFAULT;
+	}
+#endif
+	return 0;
+}
+
+static int restore_sigregs_ext(struct pt_regs *regs,
+			       _sigregs_ext __user *sregs_ext)
+{
+#ifdef CONFIG_64BIT
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i;
+
+	/* Restore vector registers from signal stack */
+	if (current->thread.vxrs) {
+		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
+				     sizeof(sregs_ext->vxrs_low)) ||
+		    __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+				     &sregs_ext->vxrs_high,
+				     sizeof(sregs_ext->vxrs_high)))
+			return -EFAULT;
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			*((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+	}
+#endif
+	return 0;
+}
+
 SYSCALL_DEFINE0(sigreturn)
 {
 	struct pt_regs *regs = task_pt_regs(current);
-	sigframe __user *frame = (sigframe __user *)regs->gprs[15];
+	struct sigframe __user *frame =
+		(struct sigframe __user *) regs->gprs[15];
 	sigset_t set;
 
 	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
@@ -128,6 +253,9 @@ SYSCALL_DEFINE0(sigreturn)
 	set_current_blocked(&set);
 	if (restore_sigregs(regs, &frame->sregs))
 		goto badframe;
+	if (restore_sigregs_ext(regs, &frame->sregs_ext))
+		goto badframe;
+	load_sigregs();
 	return regs->gprs[2];
 badframe:
 	force_sig(SIGSEGV, current);
@@ -137,27 +265,26 @@ badframe:
 SYSCALL_DEFINE0(rt_sigreturn)
 {
 	struct pt_regs *regs = task_pt_regs(current);
-	rt_sigframe __user *frame = (rt_sigframe __user *)regs->gprs[15];
+	struct rt_sigframe __user *frame =
+		(struct rt_sigframe __user *)regs->gprs[15];
 	sigset_t set;
 
 	if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 	set_current_blocked(&set);
+	if (restore_altstack(&frame->uc.uc_stack))
+		goto badframe;
 	if (restore_sigregs(regs, &frame->uc.uc_mcontext))
 		goto badframe;
-	if (restore_altstack(&frame->uc.uc_stack))
+	if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
 		goto badframe;
+	load_sigregs();
 	return regs->gprs[2];
 badframe:
 	force_sig(SIGSEGV, current);
 	return 0;
 }
 
-/*
- * Set up a signal frame.
- */
-
-
 /*
  * Determine which stack to use..
  */
@@ -195,39 +322,63 @@ static inline int map_signal(int sig)
 static int setup_frame(int sig, struct k_sigaction *ka,
 		       sigset_t *set, struct pt_regs * regs)
 {
-	sigframe __user *frame;
-
-	frame = get_sigframe(ka, regs, sizeof(sigframe));
+	struct sigframe __user *frame;
+	struct sigcontext sc;
+	unsigned long restorer;
+	size_t frame_size;
 
+	/*
+	 * gprs_high are only present for a 31-bit task running on
+	 * a 64-bit kernel (see compat_signal.c) but the space for
+	 * gprs_high need to be allocated if vector registers are
+	 * included in the signal frame on a 31-bit system.
+	 */
+	frame_size = sizeof(*frame) - sizeof(frame->sregs_ext);
+	if (MACHINE_HAS_VX)
+		frame_size += sizeof(frame->sregs_ext);
+	frame = get_sigframe(ka, regs, frame_size);
 	if (frame == (void __user *) -1UL)
 		return -EFAULT;
 
-	if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE))
+	/* Set up backchain. */
+	if (__put_user(regs->gprs[15], (addr_t __user *) frame))
 		return -EFAULT;
 
+	/* Create struct sigcontext on the signal stack */
+	memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE);
+	sc.sregs = (_sigregs __user __force *) &frame->sregs;
+	if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc)))
+		return -EFAULT;
+
+	/* Store registers needed to create the signal frame */
+	store_sigregs();
+
+	/* Create _sigregs on the signal stack */
 	if (save_sigregs(regs, &frame->sregs))
 		return -EFAULT;
-	if (__put_user(&frame->sregs, &frame->sc.sregs))
+
+	/* Place signal number on stack to allow backtrace from handler.  */
+	if (__put_user(regs->gprs[2], (int __user *) &frame->signo))
+		return -EFAULT;
+
+	/* Create _sigregs_ext on the signal stack */
+	if (save_sigregs_ext(regs, &frame->sregs_ext))
 		return -EFAULT;
 
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
 	if (ka->sa.sa_flags & SA_RESTORER) {
-                regs->gprs[14] = (unsigned long)
-			ka->sa.sa_restorer | PSW_ADDR_AMODE;
+		restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE;
 	} else {
-                regs->gprs[14] = (unsigned long)
-			frame->retcode | PSW_ADDR_AMODE;
-		if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn,
-	                       (u16 __user *)(frame->retcode)))
+		/* Signal frame without vector registers are short ! */
+		__u16 __user *svc = (void *) frame + frame_size - 2;
+		if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
 			return -EFAULT;
+		restorer = (unsigned long) svc | PSW_ADDR_AMODE;
 	}
 
-	/* Set up backchain. */
-	if (__put_user(regs->gprs[15], (addr_t __user *) frame))
-		return -EFAULT;
-
 	/* Set up registers for signal handler */
+	regs->gprs[14] = restorer;
 	regs->gprs[15] = (unsigned long) frame;
 	/* Force default amode and default user address space control. */
 	regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
@@ -247,54 +398,69 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 		regs->gprs[5] = regs->int_parm_long;
 		regs->gprs[6] = task_thread_info(current)->last_break;
 	}
-
-	/* Place signal number on stack to allow backtrace from handler.  */
-	if (__put_user(regs->gprs[2], (int __user *) &frame->signo))
-		return -EFAULT;
 	return 0;
 }
 
 static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
 			  struct pt_regs *regs)
 {
-	int err = 0;
-	rt_sigframe __user *frame;
-
-	frame = get_sigframe(&ksig->ka, regs, sizeof(rt_sigframe));
+	struct rt_sigframe __user *frame;
+	unsigned long uc_flags, restorer;
+	size_t frame_size;
 
+	frame_size = sizeof(struct rt_sigframe) - sizeof(_sigregs_ext);
+	/*
+	 * gprs_high are only present for a 31-bit task running on
+	 * a 64-bit kernel (see compat_signal.c) but the space for
+	 * gprs_high need to be allocated if vector registers are
+	 * included in the signal frame on a 31-bit system.
+	 */
+	uc_flags = 0;
+#ifdef CONFIG_64BIT
+	if (MACHINE_HAS_VX) {
+		frame_size += sizeof(_sigregs_ext);
+		if (current->thread.vxrs)
+			uc_flags |= UC_VXRS;
+	}
+#endif
+	frame = get_sigframe(&ksig->ka, regs, frame_size);
 	if (frame == (void __user *) -1UL)
 		return -EFAULT;
 
-	if (copy_siginfo_to_user(&frame->info, &ksig->info))
-		return -EFAULT;
-
-	/* Create the ucontext.  */
-	err |= __put_user(0, &frame->uc.uc_flags);
-	err |= __put_user(NULL, &frame->uc.uc_link);
-	err |= __save_altstack(&frame->uc.uc_stack, regs->gprs[15]);
-	err |= save_sigregs(regs, &frame->uc.uc_mcontext);
-	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-	if (err)
+	/* Set up backchain. */
+	if (__put_user(regs->gprs[15], (addr_t __user *) frame))
 		return -EFAULT;
 
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
 	if (ksig->ka.sa.sa_flags & SA_RESTORER) {
-                regs->gprs[14] = (unsigned long)
+		restorer = (unsigned long)
 			ksig->ka.sa.sa_restorer | PSW_ADDR_AMODE;
 	} else {
-                regs->gprs[14] = (unsigned long)
-			frame->retcode | PSW_ADDR_AMODE;
-		if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
-			       (u16 __user *)(frame->retcode)))
+		__u16 __user *svc = &frame->svc_insn;
+		if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
 			return -EFAULT;
+		restorer = (unsigned long) svc | PSW_ADDR_AMODE;
 	}
 
-	/* Set up backchain. */
-	if (__put_user(regs->gprs[15], (addr_t __user *) frame))
+	/* Create siginfo on the signal stack */
+	if (copy_siginfo_to_user(&frame->info, &ksig->info))
+		return -EFAULT;
+
+	/* Store registers needed to create the signal frame */
+	store_sigregs();
+
+	/* Create ucontext on the signal stack. */
+	if (__put_user(uc_flags, &frame->uc.uc_flags) ||
+	    __put_user(NULL, &frame->uc.uc_link) ||
+	    __save_altstack(&frame->uc.uc_stack, regs->gprs[15]) ||
+	    save_sigregs(regs, &frame->uc.uc_mcontext) ||
+	    __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) ||
+	    save_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
 		return -EFAULT;
 
 	/* Set up registers for signal handler */
+	regs->gprs[14] = restorer;
 	regs->gprs[15] = (unsigned long) frame;
 	/* Force default amode and default user address space control. */
 	regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index bba0e2469254..13cae5b152d7 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -179,6 +179,9 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
 			goto out;
 	}
 #else
+	if (MACHINE_HAS_VX)
+		lc->vector_save_area_addr =
+			(unsigned long) &lc->vector_save_area;
 	if (vdso_alloc_per_cpu(lc))
 		goto out;
 #endif
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index e3e06a4fdfce..9ff5ecba26ab 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -18,6 +18,8 @@
 #include <linux/ptrace.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/switch_to.h>
 #include "entry.h"
 
 int show_unhandled_signals = 1;
@@ -303,6 +305,74 @@ DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
 	      "specification exception");
 #endif
 
+#ifdef CONFIG_64BIT
+int alloc_vector_registers(struct task_struct *tsk)
+{
+	__vector128 *vxrs;
+	int i;
+
+	/* Allocate vector register save area. */
+	vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS,
+		       GFP_KERNEL|__GFP_REPEAT);
+	if (!vxrs)
+		return -ENOMEM;
+	preempt_disable();
+	if (tsk == current)
+		save_fp_regs(tsk->thread.fp_regs.fprs);
+	/* Copy the 16 floating point registers */
+	for (i = 0; i < 16; i++)
+		*(freg_t *) &vxrs[i] = tsk->thread.fp_regs.fprs[i];
+	tsk->thread.vxrs = vxrs;
+	if (tsk == current) {
+		__ctl_set_bit(0, 17);
+		restore_vx_regs(vxrs);
+	}
+	preempt_enable();
+	return 0;
+}
+
+void vector_exception(struct pt_regs *regs)
+{
+	int si_code, vic;
+
+	if (!MACHINE_HAS_VX) {
+		do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation");
+		return;
+	}
+
+	/* get vector interrupt code from fpc */
+	asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
+	vic = (current->thread.fp_regs.fpc & 0xf00) >> 8;
+	switch (vic) {
+	case 1: /* invalid vector operation */
+		si_code = FPE_FLTINV;
+		break;
+	case 2: /* division by zero */
+		si_code = FPE_FLTDIV;
+		break;
+	case 3: /* overflow */
+		si_code = FPE_FLTOVF;
+		break;
+	case 4: /* underflow */
+		si_code = FPE_FLTUND;
+		break;
+	case 5:	/* inexact */
+		si_code = FPE_FLTRES;
+		break;
+	default: /* unknown cause */
+		si_code = 0;
+	}
+	do_trap(regs, SIGFPE, si_code, "vector exception");
+}
+
+static int __init disable_vector_extension(char *str)
+{
+	S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
+	return 1;
+}
+__setup("novx", disable_vector_extension);
+#endif
+
 void data_exception(struct pt_regs *regs)
 {
 	__u16 __user *location;
@@ -368,6 +438,18 @@ void data_exception(struct pt_regs *regs)
                 }
         }
 #endif 
+#ifdef CONFIG_64BIT
+	/* Check for vector register enablement */
+	if (MACHINE_HAS_VX && !current->thread.vxrs &&
+	    (current->thread.fp_regs.fpc & FPC_DXC_MASK) == 0xfe00) {
+		alloc_vector_registers(current);
+		/* Vector data exception is suppressing, rewind psw. */
+		regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
+		clear_pt_regs_flag(regs, PIF_PER_TRAP);
+		return;
+	}
+#endif
+
 	if (current->thread.fp_regs.fpc & FPC_DXC_MASK)
 		signal = SIGFPE;
 	else
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index ef6103bf1f9b..ea9bf2561b9e 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -391,6 +391,8 @@ typedef struct elf64_shdr {
 #define NT_S390_LAST_BREAK	0x306	/* s390 breaking event address */
 #define NT_S390_SYSTEM_CALL	0x307	/* s390 system call restart data */
 #define NT_S390_TDB	0x308		/* s390 transaction diagnostic block */
+#define NT_S390_VXRS_LOW	0x309	/* s390 vector registers 0-15 upper half */
+#define NT_S390_VXRS_HIGH	0x30a	/* s390 vector registers 16-31 */
 #define NT_ARM_VFP	0x400		/* ARM VFP/NEON registers */
 #define NT_ARM_TLS	0x401		/* ARM TLS register */
 #define NT_ARM_HW_BREAK	0x402		/* ARM hardware breakpoint registers */
-- 
cgit v1.2.3


From f606b77f1a9e362451aca8f81d8f36a3a112139e Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Thu, 9 Oct 2014 15:27:37 -0700
Subject: prctl: PR_SET_MM -- introduce PR_SET_MM_MAP operation

During development of c/r we've noticed that in case if we need to support
user namespaces we face a problem with capabilities in prctl(PR_SET_MM,
...) call, in particular once new user namespace is created
capable(CAP_SYS_RESOURCE) no longer passes.

A approach is to eliminate CAP_SYS_RESOURCE check but pass all new values
in one bundle, which would allow the kernel to make more intensive test
for sanity of values and same time allow us to support checkpoint/restore
of user namespaces.

Thus a new command PR_SET_MM_MAP introduced. It takes a pointer of
prctl_mm_map structure which carries all the members to be updated.

	prctl(PR_SET_MM, PR_SET_MM_MAP, struct prctl_mm_map *, size)

	struct prctl_mm_map {
		__u64	start_code;
		__u64	end_code;
		__u64	start_data;
		__u64	end_data;
		__u64	start_brk;
		__u64	brk;
		__u64	start_stack;
		__u64	arg_start;
		__u64	arg_end;
		__u64	env_start;
		__u64	env_end;
		__u64	*auxv;
		__u32	auxv_size;
		__u32	exe_fd;
	};

All members except @exe_fd correspond ones of struct mm_struct.  To figure
out which available values these members may take here are meanings of the
members.

 - start_code, end_code: represent bounds of executable code area
 - start_data, end_data: represent bounds of data area
 - start_brk, brk: used to calculate bounds for brk() syscall
 - start_stack: used when accounting space needed for command
   line arguments, environment and shmat() syscall
 - arg_start, arg_end, env_start, env_end: represent memory area
   supplied for command line arguments and environment variables
 - auxv, auxv_size: carries auxiliary vector, Elf format specifics
 - exe_fd: file descriptor number for executable link (/proc/self/exe)

Thus we apply the following requirements to the values

1) Any member except @auxv, @auxv_size, @exe_fd is rather an address
   in user space thus it must be laying inside [mmap_min_addr, mmap_max_addr)
   interval.

2) While @[start|end]_code and @[start|end]_data may point to an nonexisting
   VMAs (say a program maps own new .text and .data segments during execution)
   the rest of members should belong to VMA which must exist.

3) Addresses must be ordered, ie @start_ member must not be greater or
   equal to appropriate @end_ member.

4) As in regular Elf loading procedure we require that @start_brk and
   @brk be greater than @end_data.

5) If RLIMIT_DATA rlimit is set to non-infinity new values should not
   exceed existing limit. Same applies to RLIMIT_STACK.

6) Auxiliary vector size must not exceed existing one (which is
   predefined as AT_VECTOR_SIZE and depends on architecture).

7) File descriptor passed in @exe_file should be pointing
   to executable file (because we use existing prctl_set_mm_exe_file_locked
   helper it ensures that the file we are going to use as exe link has all
   required permission granted).

Now about where these members are involved inside kernel code:

 - @start_code and @end_code are used in /proc/$pid/[stat|statm] output;

 - @start_data and @end_data are used in /proc/$pid/[stat|statm] output,
   also they are considered if there enough space for brk() syscall
   result if RLIMIT_DATA is set;

 - @start_brk shown in /proc/$pid/stat output and accounted in brk()
   syscall if RLIMIT_DATA is set; also this member is tested to
   find a symbolic name of mmap event for perf system (we choose
   if event is generated for "heap" area); one more aplication is
   selinux -- we test if a process has PROCESS__EXECHEAP permission
   if trying to make heap area being executable with mprotect() syscall;

 - @brk is a current value for brk() syscall which lays inside heap
   area, it's shown in /proc/$pid/stat. When syscall brk() succesfully
   provides new memory area to a user space upon brk() completion the
   mm::brk is updated to carry new value;

   Both @start_brk and @brk are actively used in /proc/$pid/maps
   and /proc/$pid/smaps output to find a symbolic name "heap" for
   VMA being scanned;

 - @start_stack is printed out in /proc/$pid/stat and used to
   find a symbolic name "stack" for task and threads in
   /proc/$pid/maps and /proc/$pid/smaps output, and as the same
   as with @start_brk -- perf system uses it for event naming.
   Also kernel treat this member as a start address of where
   to map vDSO pages and to check if there is enough space
   for shmat() syscall;

 - @arg_start, @arg_end, @env_start and @env_end are printed out
   in /proc/$pid/stat. Another access to the data these members
   represent is to read /proc/$pid/environ or /proc/$pid/cmdline.
   Any attempt to read these areas kernel tests with access_process_vm
   helper so a user must have enough rights for this action;

 - @auxv and @auxv_size may be read from /proc/$pid/auxv. Strictly
   speaking kernel doesn't care much about which exactly data is
   sitting there because it is solely for userspace;

 - @exe_fd is referred from /proc/$pid/exe and when generating
   coredump. We uses prctl_set_mm_exe_file_locked helper to update
   this member, so exe-file link modification remains one-shot
   action.

Still note that updating exe-file link now doesn't require sys-resource
capability anymore, after all there is no much profit in preventing setup
own file link (there are a number of ways to execute own code -- ptrace,
ld-preload, so that the only reliable way to find which exactly code is
executed is to inspect running program memory).  Still we require the
caller to be at least user-namespace root user.

I believe the old interface should be deprecated and ripped off in a
couple of kernel releases if no one against.

To test if new interface is implemented in the kernel one can pass
PR_SET_MM_MAP_SIZE opcode and the kernel returns the size of currently
supported struct prctl_mm_map.

[akpm@linux-foundation.org: fix 80-col wordwrap in macro definitions]
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tejun Heo <tj@kernel.org>
Acked-by: Andrew Vagin <avagin@openvz.org>
Tested-by: Andrew Vagin <avagin@openvz.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Vasiliy Kulikov <segoon@openwall.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Julien Tinnes <jln@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/prctl.h |  27 +++++++
 kernel/sys.c               | 190 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 216 insertions(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 58afc04c107e..513df75d0fc9 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_PRCTL_H
 #define _LINUX_PRCTL_H
 
+#include <linux/types.h>
+
 /* Values to pass as first argument to prctl() */
 
 #define PR_SET_PDEATHSIG  1  /* Second arg is a signal */
@@ -119,6 +121,31 @@
 # define PR_SET_MM_ENV_END		11
 # define PR_SET_MM_AUXV			12
 # define PR_SET_MM_EXE_FILE		13
+# define PR_SET_MM_MAP			14
+# define PR_SET_MM_MAP_SIZE		15
+
+/*
+ * This structure provides new memory descriptor
+ * map which mostly modifies /proc/pid/stat[m]
+ * output for a task. This mostly done in a
+ * sake of checkpoint/restore functionality.
+ */
+struct prctl_mm_map {
+	__u64	start_code;		/* code section bounds */
+	__u64	end_code;
+	__u64	start_data;		/* data section bounds */
+	__u64	end_data;
+	__u64	start_brk;		/* heap for brk() syscall */
+	__u64	brk;
+	__u64	start_stack;		/* stack starts at */
+	__u64	arg_start;		/* command line arguments bounds */
+	__u64	arg_end;
+	__u64	env_start;		/* environment variables bounds */
+	__u64	env_end;
+	__u64	*auxv;			/* auxiliary vector */
+	__u32	auxv_size;		/* vector size */
+	__u32	exe_fd;			/* /proc/$pid/exe link file */
+};
 
 /*
  * Set specific pid that is allowed to ptrace the current task.
diff --git a/kernel/sys.c b/kernel/sys.c
index 14222a1699c0..f7030b060018 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1687,6 +1687,187 @@ exit:
 	return err;
 }
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+/*
+ * WARNING: we don't require any capability here so be very careful
+ * in what is allowed for modification from userspace.
+ */
+static int validate_prctl_map(struct prctl_mm_map *prctl_map)
+{
+	unsigned long mmap_max_addr = TASK_SIZE;
+	struct mm_struct *mm = current->mm;
+	int error = -EINVAL, i;
+
+	static const unsigned char offsets[] = {
+		offsetof(struct prctl_mm_map, start_code),
+		offsetof(struct prctl_mm_map, end_code),
+		offsetof(struct prctl_mm_map, start_data),
+		offsetof(struct prctl_mm_map, end_data),
+		offsetof(struct prctl_mm_map, start_brk),
+		offsetof(struct prctl_mm_map, brk),
+		offsetof(struct prctl_mm_map, start_stack),
+		offsetof(struct prctl_mm_map, arg_start),
+		offsetof(struct prctl_mm_map, arg_end),
+		offsetof(struct prctl_mm_map, env_start),
+		offsetof(struct prctl_mm_map, env_end),
+	};
+
+	/*
+	 * Make sure the members are not somewhere outside
+	 * of allowed address space.
+	 */
+	for (i = 0; i < ARRAY_SIZE(offsets); i++) {
+		u64 val = *(u64 *)((char *)prctl_map + offsets[i]);
+
+		if ((unsigned long)val >= mmap_max_addr ||
+		    (unsigned long)val < mmap_min_addr)
+			goto out;
+	}
+
+	/*
+	 * Make sure the pairs are ordered.
+	 */
+#define __prctl_check_order(__m1, __op, __m2)				\
+	((unsigned long)prctl_map->__m1 __op				\
+	 (unsigned long)prctl_map->__m2) ? 0 : -EINVAL
+	error  = __prctl_check_order(start_code, <, end_code);
+	error |= __prctl_check_order(start_data, <, end_data);
+	error |= __prctl_check_order(start_brk, <=, brk);
+	error |= __prctl_check_order(arg_start, <=, arg_end);
+	error |= __prctl_check_order(env_start, <=, env_end);
+	if (error)
+		goto out;
+#undef __prctl_check_order
+
+	error = -EINVAL;
+
+	/*
+	 * @brk should be after @end_data in traditional maps.
+	 */
+	if (prctl_map->start_brk <= prctl_map->end_data ||
+	    prctl_map->brk <= prctl_map->end_data)
+		goto out;
+
+	/*
+	 * Neither we should allow to override limits if they set.
+	 */
+	if (check_data_rlimit(rlimit(RLIMIT_DATA), prctl_map->brk,
+			      prctl_map->start_brk, prctl_map->end_data,
+			      prctl_map->start_data))
+			goto out;
+
+	/*
+	 * Someone is trying to cheat the auxv vector.
+	 */
+	if (prctl_map->auxv_size) {
+		if (!prctl_map->auxv || prctl_map->auxv_size > sizeof(mm->saved_auxv))
+			goto out;
+	}
+
+	/*
+	 * Finally, make sure the caller has the rights to
+	 * change /proc/pid/exe link: only local root should
+	 * be allowed to.
+	 */
+	if (prctl_map->exe_fd != (u32)-1) {
+		struct user_namespace *ns = current_user_ns();
+		const struct cred *cred = current_cred();
+
+		if (!uid_eq(cred->uid, make_kuid(ns, 0)) ||
+		    !gid_eq(cred->gid, make_kgid(ns, 0)))
+			goto out;
+	}
+
+	error = 0;
+out:
+	return error;
+}
+
+static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size)
+{
+	struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, };
+	unsigned long user_auxv[AT_VECTOR_SIZE];
+	struct mm_struct *mm = current->mm;
+	int error;
+
+	BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
+	BUILD_BUG_ON(sizeof(struct prctl_mm_map) > 256);
+
+	if (opt == PR_SET_MM_MAP_SIZE)
+		return put_user((unsigned int)sizeof(prctl_map),
+				(unsigned int __user *)addr);
+
+	if (data_size != sizeof(prctl_map))
+		return -EINVAL;
+
+	if (copy_from_user(&prctl_map, addr, sizeof(prctl_map)))
+		return -EFAULT;
+
+	error = validate_prctl_map(&prctl_map);
+	if (error)
+		return error;
+
+	if (prctl_map.auxv_size) {
+		memset(user_auxv, 0, sizeof(user_auxv));
+		if (copy_from_user(user_auxv,
+				   (const void __user *)prctl_map.auxv,
+				   prctl_map.auxv_size))
+			return -EFAULT;
+
+		/* Last entry must be AT_NULL as specification requires */
+		user_auxv[AT_VECTOR_SIZE - 2] = AT_NULL;
+		user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL;
+	}
+
+	down_write(&mm->mmap_sem);
+	if (prctl_map.exe_fd != (u32)-1)
+		error = prctl_set_mm_exe_file_locked(mm, prctl_map.exe_fd);
+	downgrade_write(&mm->mmap_sem);
+	if (error)
+		goto out;
+
+	/*
+	 * We don't validate if these members are pointing to
+	 * real present VMAs because application may have correspond
+	 * VMAs already unmapped and kernel uses these members for statistics
+	 * output in procfs mostly, except
+	 *
+	 *  - @start_brk/@brk which are used in do_brk but kernel lookups
+	 *    for VMAs when updating these memvers so anything wrong written
+	 *    here cause kernel to swear at userspace program but won't lead
+	 *    to any problem in kernel itself
+	 */
+
+	mm->start_code	= prctl_map.start_code;
+	mm->end_code	= prctl_map.end_code;
+	mm->start_data	= prctl_map.start_data;
+	mm->end_data	= prctl_map.end_data;
+	mm->start_brk	= prctl_map.start_brk;
+	mm->brk		= prctl_map.brk;
+	mm->start_stack	= prctl_map.start_stack;
+	mm->arg_start	= prctl_map.arg_start;
+	mm->arg_end	= prctl_map.arg_end;
+	mm->env_start	= prctl_map.env_start;
+	mm->env_end	= prctl_map.env_end;
+
+	/*
+	 * Note this update of @saved_auxv is lockless thus
+	 * if someone reads this member in procfs while we're
+	 * updating -- it may get partly updated results. It's
+	 * known and acceptable trade off: we leave it as is to
+	 * not introduce additional locks here making the kernel
+	 * more complex.
+	 */
+	if (prctl_map.auxv_size)
+		memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));
+
+	error = 0;
+out:
+	up_read(&mm->mmap_sem);
+	return error;
+}
+#endif /* CONFIG_CHECKPOINT_RESTORE */
+
 static int prctl_set_mm(int opt, unsigned long addr,
 			unsigned long arg4, unsigned long arg5)
 {
@@ -1694,9 +1875,16 @@ static int prctl_set_mm(int opt, unsigned long addr,
 	struct vm_area_struct *vma;
 	int error;
 
-	if (arg5 || (arg4 && opt != PR_SET_MM_AUXV))
+	if (arg5 || (arg4 && (opt != PR_SET_MM_AUXV &&
+			      opt != PR_SET_MM_MAP &&
+			      opt != PR_SET_MM_MAP_SIZE)))
 		return -EINVAL;
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+	if (opt == PR_SET_MM_MAP || opt == PR_SET_MM_MAP_SIZE)
+		return prctl_set_mm_map(opt, (const void __user *)addr, arg4);
+#endif
+
 	if (!capable(CAP_SYS_RESOURCE))
 		return -EPERM;
 
-- 
cgit v1.2.3


From 09316c09dde33aae14f34489d9e3d243ec0d5938 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Date: Thu, 9 Oct 2014 15:29:32 -0700
Subject: mm/balloon_compaction: add vmstat counters and kpageflags bit

Always mark pages with PageBalloon even if balloon compaction is disabled
and expose this mark in /proc/kpageflags as KPF_BALLOON.

Also this patch adds three counters into /proc/vmstat: "balloon_inflate",
"balloon_deflate" and "balloon_migrate".  They accumulate balloon
activity.  Current size of balloon is (balloon_inflate - balloon_deflate)
pages.

All generic balloon code now gathered under option CONFIG_MEMORY_BALLOON.
It should be selected by ballooning driver which wants use this feature.
Currently virtio-balloon is the only user.

Signed-off-by: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/virtio/Kconfig                 |  1 +
 drivers/virtio/virtio_balloon.c        |  1 +
 fs/proc/page.c                         |  3 +++
 include/linux/balloon_compaction.h     |  2 ++
 include/linux/vm_event_item.h          |  7 +++++++
 include/uapi/linux/kernel-page-flags.h |  1 +
 mm/Kconfig                             |  7 ++++++-
 mm/Makefile                            |  3 ++-
 mm/balloon_compaction.c                |  2 ++
 mm/vmstat.c                            | 12 +++++++++++-
 tools/vm/page-types.c                  |  1 +
 11 files changed, 37 insertions(+), 3 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index c6683f2e396c..00b228638274 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -25,6 +25,7 @@ config VIRTIO_PCI
 config VIRTIO_BALLOON
 	tristate "Virtio balloon driver"
 	depends on VIRTIO
+	select MEMORY_BALLOON
 	---help---
 	 This driver supports increasing and decreasing the amount
 	 of memory within a KVM guest.
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 2bad7f9dd2ac..f893148a107b 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -396,6 +396,7 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
 	spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
 	balloon_page_insert(vb_dev_info, newpage);
 	vb_dev_info->isolated_pages--;
+	__count_vm_event(BALLOON_MIGRATE);
 	spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
 	vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
 	set_page_pfns(vb->pfns, newpage);
diff --git a/fs/proc/page.c b/fs/proc/page.c
index e647c55275d9..1e3187da1fed 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -133,6 +133,9 @@ u64 stable_page_flags(struct page *page)
 	if (PageBuddy(page))
 		u |= 1 << KPF_BUDDY;
 
+	if (PageBalloon(page))
+		u |= 1 << KPF_BALLOON;
+
 	u |= kpf_copy_bit(k, KPF_LOCKED,	PG_locked);
 
 	u |= kpf_copy_bit(k, KPF_SLAB,		PG_slab);
diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
index bc3d2985cc9a..9b0a15d06a4f 100644
--- a/include/linux/balloon_compaction.h
+++ b/include/linux/balloon_compaction.h
@@ -166,11 +166,13 @@ static inline gfp_t balloon_mapping_gfp_mask(void)
 static inline void balloon_page_insert(struct balloon_dev_info *balloon,
 				       struct page *page)
 {
+	__SetPageBalloon(page);
 	list_add(&page->lru, &balloon->pages);
 }
 
 static inline void balloon_page_delete(struct page *page)
 {
+	__ClearPageBalloon(page);
 	list_del(&page->lru);
 }
 
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index ced92345c963..730334cdf037 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -72,6 +72,13 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		THP_ZERO_PAGE_ALLOC,
 		THP_ZERO_PAGE_ALLOC_FAILED,
 #endif
+#ifdef CONFIG_MEMORY_BALLOON
+		BALLOON_INFLATE,
+		BALLOON_DEFLATE,
+#ifdef CONFIG_BALLOON_COMPACTION
+		BALLOON_MIGRATE,
+#endif
+#endif
 #ifdef CONFIG_DEBUG_TLBFLUSH
 #ifdef CONFIG_SMP
 		NR_TLB_REMOTE_FLUSH,	/* cpu tried to flush others' tlbs */
diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h
index 5116a0e48172..2f96d233c980 100644
--- a/include/uapi/linux/kernel-page-flags.h
+++ b/include/uapi/linux/kernel-page-flags.h
@@ -31,6 +31,7 @@
 
 #define KPF_KSM			21
 #define KPF_THP			22
+#define KPF_BALLOON		23
 
 
 #endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */
diff --git a/mm/Kconfig b/mm/Kconfig
index 0ceb8a567dab..1d1ae6b078fd 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -230,12 +230,17 @@ config SPLIT_PTLOCK_CPUS
 config ARCH_ENABLE_SPLIT_PMD_PTLOCK
 	boolean
 
+#
+# support for memory balloon
+config MEMORY_BALLOON
+	boolean
+
 #
 # support for memory balloon compaction
 config BALLOON_COMPACTION
 	bool "Allow for balloon memory compaction/migration"
 	def_bool y
-	depends on COMPACTION && VIRTIO_BALLOON
+	depends on COMPACTION && MEMORY_BALLOON
 	help
 	  Memory fragmentation introduced by ballooning might reduce
 	  significantly the number of 2MB contiguous memory blocks that can be
diff --git a/mm/Makefile b/mm/Makefile
index f8ed7ab417b1..1f534a7f0a71 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -16,7 +16,7 @@ obj-y			:= filemap.o mempool.o oom_kill.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
 			   util.o mmzone.o vmstat.o backing-dev.o \
 			   mm_init.o mmu_context.o percpu.o slab_common.o \
-			   compaction.o balloon_compaction.o vmacache.o \
+			   compaction.o vmacache.o \
 			   interval_tree.o list_lru.o workingset.o \
 			   iov_iter.o debug.o $(mmu-y)
 
@@ -67,3 +67,4 @@ obj-$(CONFIG_ZBUD)	+= zbud.o
 obj-$(CONFIG_ZSMALLOC)	+= zsmalloc.o
 obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
 obj-$(CONFIG_CMA)	+= cma.o
+obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index 3afdabdbc0a4..b3cbe19f71b5 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -36,6 +36,7 @@ struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info)
 	BUG_ON(!trylock_page(page));
 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
 	balloon_page_insert(b_dev_info, page);
+	__count_vm_event(BALLOON_INFLATE);
 	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
 	unlock_page(page);
 	return page;
@@ -74,6 +75,7 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
 			}
 			spin_lock_irqsave(&b_dev_info->pages_lock, flags);
 			balloon_page_delete(page);
+			__count_vm_event(BALLOON_DEFLATE);
 			spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
 			unlock_page(page);
 			dequeued_page = true;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index e9ab104b956f..cce7c766da7a 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -735,7 +735,7 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
 					TEXT_FOR_HIGHMEM(xx) xx "_movable",
 
 const char * const vmstat_text[] = {
-	/* Zoned VM counters */
+	/* enum zone_stat_item countes */
 	"nr_free_pages",
 	"nr_alloc_batch",
 	"nr_inactive_anon",
@@ -778,10 +778,13 @@ const char * const vmstat_text[] = {
 	"workingset_nodereclaim",
 	"nr_anon_transparent_hugepages",
 	"nr_free_cma",
+
+	/* enum writeback_stat_item counters */
 	"nr_dirty_threshold",
 	"nr_dirty_background_threshold",
 
 #ifdef CONFIG_VM_EVENT_COUNTERS
+	/* enum vm_event_item counters */
 	"pgpgin",
 	"pgpgout",
 	"pswpin",
@@ -860,6 +863,13 @@ const char * const vmstat_text[] = {
 	"thp_zero_page_alloc",
 	"thp_zero_page_alloc_failed",
 #endif
+#ifdef CONFIG_MEMORY_BALLOON
+	"balloon_inflate",
+	"balloon_deflate",
+#ifdef CONFIG_BALLOON_COMPACTION
+	"balloon_migrate",
+#endif
+#endif /* CONFIG_MEMORY_BALLOON */
 #ifdef CONFIG_DEBUG_TLBFLUSH
 #ifdef CONFIG_SMP
 	"nr_tlb_remote_flush",
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index c4d6d2e20e0d..264fbc297e0b 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -132,6 +132,7 @@ static const char * const page_flag_names[] = {
 	[KPF_NOPAGE]		= "n:nopage",
 	[KPF_KSM]		= "x:ksm",
 	[KPF_THP]		= "t:thp",
+	[KPF_BALLOON]		= "o:balloon",
 
 	[KPF_RESERVED]		= "r:reserved",
 	[KPF_MLOCKED]		= "m:mlocked",
-- 
cgit v1.2.3


From 2cbbca5e7c38d0c776497f586688464f8cfb1583 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 30 Sep 2014 16:02:19 +1000
Subject: md: discard PRINT_RAID_DEBUG ioctl

All the interesting information printed by this ioctl
is provided in /proc/mdstat and/or sysfs.
So it isn't needed and isn't used and would be best if it didn't
exist.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/md.c                | 138 -----------------------------------------
 include/uapi/linux/raid/md_u.h |   1 -
 2 files changed, 139 deletions(-)

(limited to 'include/uapi')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9c33f052c848..fdd2d3fe1c6c 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -66,8 +66,6 @@ static void autostart_arrays(int part);
 static LIST_HEAD(pers_list);
 static DEFINE_SPINLOCK(pers_lock);
 
-static void md_print_devices(void);
-
 static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
 static struct workqueue_struct *md_wq;
 static struct workqueue_struct *md_misc_wq;
@@ -2171,136 +2169,6 @@ static void export_array(struct mddev *mddev)
 	mddev->major_version = 0;
 }
 
-static void print_desc(mdp_disk_t *desc)
-{
-	printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number,
-		desc->major,desc->minor,desc->raid_disk,desc->state);
-}
-
-static void print_sb_90(mdp_super_t *sb)
-{
-	int i;
-
-	printk(KERN_INFO
-		"md:  SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
-		sb->major_version, sb->minor_version, sb->patch_version,
-		sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,
-		sb->ctime);
-	printk(KERN_INFO "md:     L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n",
-		sb->level, sb->size, sb->nr_disks, sb->raid_disks,
-		sb->md_minor, sb->layout, sb->chunk_size);
-	printk(KERN_INFO "md:     UT:%08x ST:%d AD:%d WD:%d"
-		" FD:%d SD:%d CSUM:%08x E:%08lx\n",
-		sb->utime, sb->state, sb->active_disks, sb->working_disks,
-		sb->failed_disks, sb->spare_disks,
-		sb->sb_csum, (unsigned long)sb->events_lo);
-
-	printk(KERN_INFO);
-	for (i = 0; i < MD_SB_DISKS; i++) {
-		mdp_disk_t *desc;
-
-		desc = sb->disks + i;
-		if (desc->number || desc->major || desc->minor ||
-		    desc->raid_disk || (desc->state && (desc->state != 4))) {
-			printk("     D %2d: ", i);
-			print_desc(desc);
-		}
-	}
-	printk(KERN_INFO "md:     THIS: ");
-	print_desc(&sb->this_disk);
-}
-
-static void print_sb_1(struct mdp_superblock_1 *sb)
-{
-	__u8 *uuid;
-
-	uuid = sb->set_uuid;
-	printk(KERN_INFO
-	       "md:  SB: (V:%u) (F:0x%08x) Array-ID:<%pU>\n"
-	       "md:    Name: \"%s\" CT:%llu\n",
-		le32_to_cpu(sb->major_version),
-		le32_to_cpu(sb->feature_map),
-		uuid,
-		sb->set_name,
-		(unsigned long long)le64_to_cpu(sb->ctime)
-		       & MD_SUPERBLOCK_1_TIME_SEC_MASK);
-
-	uuid = sb->device_uuid;
-	printk(KERN_INFO
-	       "md:       L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
-			" RO:%llu\n"
-	       "md:     Dev:%08x UUID: %pU\n"
-	       "md:       (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
-	       "md:         (MaxDev:%u) \n",
-		le32_to_cpu(sb->level),
-		(unsigned long long)le64_to_cpu(sb->size),
-		le32_to_cpu(sb->raid_disks),
-		le32_to_cpu(sb->layout),
-		le32_to_cpu(sb->chunksize),
-		(unsigned long long)le64_to_cpu(sb->data_offset),
-		(unsigned long long)le64_to_cpu(sb->data_size),
-		(unsigned long long)le64_to_cpu(sb->super_offset),
-		(unsigned long long)le64_to_cpu(sb->recovery_offset),
-		le32_to_cpu(sb->dev_number),
-		uuid,
-		sb->devflags,
-		(unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
-		(unsigned long long)le64_to_cpu(sb->events),
-		(unsigned long long)le64_to_cpu(sb->resync_offset),
-		le32_to_cpu(sb->sb_csum),
-		le32_to_cpu(sb->max_dev)
-		);
-}
-
-static void print_rdev(struct md_rdev *rdev, int major_version)
-{
-	char b[BDEVNAME_SIZE];
-	printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n",
-	       bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors,
-	       test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
-	       rdev->desc_nr);
-	if (rdev->sb_loaded) {
-		printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
-		switch (major_version) {
-		case 0:
-			print_sb_90(page_address(rdev->sb_page));
-			break;
-		case 1:
-			print_sb_1(page_address(rdev->sb_page));
-			break;
-		}
-	} else
-		printk(KERN_INFO "md: no rdev superblock!\n");
-}
-
-static void md_print_devices(void)
-{
-	struct list_head *tmp;
-	struct md_rdev *rdev;
-	struct mddev *mddev;
-	char b[BDEVNAME_SIZE];
-
-	printk("\n");
-	printk("md:	**********************************\n");
-	printk("md:	* <COMPLETE RAID STATE PRINTOUT> *\n");
-	printk("md:	**********************************\n");
-	for_each_mddev(mddev, tmp) {
-
-		if (mddev->bitmap)
-			bitmap_print_sb(mddev->bitmap);
-		else
-			printk("%s: ", mdname(mddev));
-		rdev_for_each(rdev, mddev)
-			printk("<%s>", bdevname(rdev->bdev,b));
-		printk("\n");
-
-		rdev_for_each(rdev, mddev)
-			print_rdev(rdev, mddev->major_version);
-	}
-	printk("md:	**********************************\n");
-	printk("\n");
-}
-
 static void sync_sbs(struct mddev *mddev, int nospares)
 {
 	/* Update each superblock (in-memory image), but
@@ -6285,7 +6153,6 @@ static inline bool md_ioctl_valid(unsigned int cmd)
 	case GET_DISK_INFO:
 	case HOT_ADD_DISK:
 	case HOT_REMOVE_DISK:
-	case PRINT_RAID_DEBUG:
 	case RAID_AUTORUN:
 	case RAID_VERSION:
 	case RESTART_ARRAY_RW:
@@ -6331,11 +6198,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
 		err = get_version(argp);
 		goto out;
 
-	case PRINT_RAID_DEBUG:
-		err = 0;
-		md_print_devices();
-		goto out;
-
 #ifndef MODULE
 	case RAID_AUTORUN:
 		err = 0;
diff --git a/include/uapi/linux/raid/md_u.h b/include/uapi/linux/raid/md_u.h
index 4133e744e4e6..74e7c60c4716 100644
--- a/include/uapi/linux/raid/md_u.h
+++ b/include/uapi/linux/raid/md_u.h
@@ -39,7 +39,6 @@
 #define RAID_VERSION		_IOR (MD_MAJOR, 0x10, mdu_version_t)
 #define GET_ARRAY_INFO		_IOR (MD_MAJOR, 0x11, mdu_array_info_t)
 #define GET_DISK_INFO		_IOR (MD_MAJOR, 0x12, mdu_disk_info_t)
-#define PRINT_RAID_DEBUG	_IO (MD_MAJOR, 0x13)
 #define RAID_AUTORUN		_IO (MD_MAJOR, 0x14)
 #define GET_BITMAP_FILE		_IOR (MD_MAJOR, 0x15, mdu_bitmap_file_t)
 
-- 
cgit v1.2.3


From c15952dc18d8a293d976ac6c06d44d9d98023b45 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Tue, 14 Oct 2014 02:08:54 -0700
Subject: net: filter: move common defines into bpf_common.h

userspace programs that use eBPF instruction macros need to include two files:
uapi/linux/filter.h and uapi/linux/bpf.h
Move common macro definitions that are shared between classic BPF and eBPF
into uapi/linux/bpf_common.h, so that user app can include only one bpf.h file

Cc: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/Kbuild       |  1 +
 include/uapi/linux/bpf.h        |  1 +
 include/uapi/linux/bpf_common.h | 55 ++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/filter.h     | 56 +----------------------------------------
 4 files changed, 58 insertions(+), 55 deletions(-)
 create mode 100644 include/uapi/linux/bpf_common.h

(limited to 'include/uapi')

diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 70e150ebc6c9..1115d68b150f 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -68,6 +68,7 @@ header-y += binfmts.h
 header-y += blkpg.h
 header-y += blktrace_api.h
 header-y += bpf.h
+header-y += bpf_common.h
 header-y += bpqether.h
 header-y += bsg.h
 header-y += btrfs.h
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 31b0ac208a52..d18316f9e9c4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -8,6 +8,7 @@
 #define _UAPI__LINUX_BPF_H__
 
 #include <linux/types.h>
+#include <linux/bpf_common.h>
 
 /* Extended instruction set based on top of classic BPF */
 
diff --git a/include/uapi/linux/bpf_common.h b/include/uapi/linux/bpf_common.h
new file mode 100644
index 000000000000..a5c220e0828f
--- /dev/null
+++ b/include/uapi/linux/bpf_common.h
@@ -0,0 +1,55 @@
+#ifndef _UAPI__LINUX_BPF_COMMON_H__
+#define _UAPI__LINUX_BPF_COMMON_H__
+
+/* Instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define		BPF_LD		0x00
+#define		BPF_LDX		0x01
+#define		BPF_ST		0x02
+#define		BPF_STX		0x03
+#define		BPF_ALU		0x04
+#define		BPF_JMP		0x05
+#define		BPF_RET		0x06
+#define		BPF_MISC        0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code)  ((code) & 0x18)
+#define		BPF_W		0x00
+#define		BPF_H		0x08
+#define		BPF_B		0x10
+#define BPF_MODE(code)  ((code) & 0xe0)
+#define		BPF_IMM		0x00
+#define		BPF_ABS		0x20
+#define		BPF_IND		0x40
+#define		BPF_MEM		0x60
+#define		BPF_LEN		0x80
+#define		BPF_MSH		0xa0
+
+/* alu/jmp fields */
+#define BPF_OP(code)    ((code) & 0xf0)
+#define		BPF_ADD		0x00
+#define		BPF_SUB		0x10
+#define		BPF_MUL		0x20
+#define		BPF_DIV		0x30
+#define		BPF_OR		0x40
+#define		BPF_AND		0x50
+#define		BPF_LSH		0x60
+#define		BPF_RSH		0x70
+#define		BPF_NEG		0x80
+#define		BPF_MOD		0x90
+#define		BPF_XOR		0xa0
+
+#define		BPF_JA		0x00
+#define		BPF_JEQ		0x10
+#define		BPF_JGT		0x20
+#define		BPF_JGE		0x30
+#define		BPF_JSET        0x40
+#define BPF_SRC(code)   ((code) & 0x08)
+#define		BPF_K		0x00
+#define		BPF_X		0x08
+
+#ifndef BPF_MAXINSNS
+#define BPF_MAXINSNS 4096
+#endif
+
+#endif /* _UAPI__LINUX_BPF_COMMON_H__ */
diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h
index 253b4d42cf2b..47785d5ecf17 100644
--- a/include/uapi/linux/filter.h
+++ b/include/uapi/linux/filter.h
@@ -7,7 +7,7 @@
 
 #include <linux/compiler.h>
 #include <linux/types.h>
-
+#include <linux/bpf_common.h>
 
 /*
  * Current version of the filter code architecture.
@@ -32,56 +32,6 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 	struct sock_filter __user *filter;
 };
 
-/*
- * Instruction classes
- */
-
-#define BPF_CLASS(code) ((code) & 0x07)
-#define         BPF_LD          0x00
-#define         BPF_LDX         0x01
-#define         BPF_ST          0x02
-#define         BPF_STX         0x03
-#define         BPF_ALU         0x04
-#define         BPF_JMP         0x05
-#define         BPF_RET         0x06
-#define         BPF_MISC        0x07
-
-/* ld/ldx fields */
-#define BPF_SIZE(code)  ((code) & 0x18)
-#define         BPF_W           0x00
-#define         BPF_H           0x08
-#define         BPF_B           0x10
-#define BPF_MODE(code)  ((code) & 0xe0)
-#define         BPF_IMM         0x00
-#define         BPF_ABS         0x20
-#define         BPF_IND         0x40
-#define         BPF_MEM         0x60
-#define         BPF_LEN         0x80
-#define         BPF_MSH         0xa0
-
-/* alu/jmp fields */
-#define BPF_OP(code)    ((code) & 0xf0)
-#define         BPF_ADD         0x00
-#define         BPF_SUB         0x10
-#define         BPF_MUL         0x20
-#define         BPF_DIV         0x30
-#define         BPF_OR          0x40
-#define         BPF_AND         0x50
-#define         BPF_LSH         0x60
-#define         BPF_RSH         0x70
-#define         BPF_NEG         0x80
-#define		BPF_MOD		0x90
-#define		BPF_XOR		0xa0
-
-#define         BPF_JA          0x00
-#define         BPF_JEQ         0x10
-#define         BPF_JGT         0x20
-#define         BPF_JGE         0x30
-#define         BPF_JSET        0x40
-#define BPF_SRC(code)   ((code) & 0x08)
-#define         BPF_K           0x00
-#define         BPF_X           0x08
-
 /* ret - BPF_K and BPF_X also apply */
 #define BPF_RVAL(code)  ((code) & 0x18)
 #define         BPF_A           0x10
@@ -91,10 +41,6 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define         BPF_TAX         0x00
 #define         BPF_TXA         0x80
 
-#ifndef BPF_MAXINSNS
-#define BPF_MAXINSNS 4096
-#endif
-
 /*
  * Macros for filter block array initializers.
  */
-- 
cgit v1.2.3


From f974008f07a62171a9dede08250c9a35c2b2b986 Mon Sep 17 00:00:00 2001
From: Olivier Gay <ogay@logitech.com>
Date: Sat, 18 Oct 2014 01:53:39 +0200
Subject: HID: add keyboard input assist hid usages

Add keyboard input assist controls usages from approved
hid usage table request HUTTR42:
http://www.usb.org/developers/hidpage/HUTRR42c.pdf

Signed-off-by: Olivier Gay <ogay@logitech.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-debug.c    | 6 ++++++
 drivers/hid/hid-input.c    | 7 +++++++
 include/uapi/linux/input.h | 7 +++++++
 3 files changed, 20 insertions(+)

(limited to 'include/uapi')

diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 84c3cb15ccdd..8bf61d295ffd 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -946,6 +946,12 @@ static const char *keys[KEY_MAX + 1] = {
 	[KEY_BRIGHTNESS_MIN] = "BrightnessMin",
 	[KEY_BRIGHTNESS_MAX] = "BrightnessMax",
 	[KEY_BRIGHTNESS_AUTO] = "BrightnessAuto",
+	[KEY_KBDINPUTASSIST_PREV] = "KbdInputAssistPrev",
+	[KEY_KBDINPUTASSIST_NEXT] = "KbdInputAssistNext",
+	[KEY_KBDINPUTASSIST_PREVGROUP] = "KbdInputAssistPrevGroup",
+	[KEY_KBDINPUTASSIST_NEXTGROUP] = "KbdInputAssistNextGroup",
+	[KEY_KBDINPUTASSIST_ACCEPT] = "KbdInputAssistAccept",
+	[KEY_KBDINPUTASSIST_CANCEL] = "KbdInputAssistCancel",
 };
 
 static const char *relatives[REL_MAX + 1] = {
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 2df7fddbd119..56c6c30f2c7d 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -862,6 +862,13 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		case 0x28b: map_key_clear(KEY_FORWARDMAIL);	break;
 		case 0x28c: map_key_clear(KEY_SEND);		break;
 
+		case 0x2c7: map_key_clear(KEY_KBDINPUTASSIST_PREV);		break;
+		case 0x2c8: map_key_clear(KEY_KBDINPUTASSIST_NEXT);		break;
+		case 0x2c9: map_key_clear(KEY_KBDINPUTASSIST_PREVGROUP);		break;
+		case 0x2ca: map_key_clear(KEY_KBDINPUTASSIST_NEXTGROUP);		break;
+		case 0x2cb: map_key_clear(KEY_KBDINPUTASSIST_ACCEPT);	break;
+		case 0x2cc: map_key_clear(KEY_KBDINPUTASSIST_CANCEL);	break;
+
 		default:    goto ignore;
 		}
 		break;
diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h
index 1874ebe9ac1e..a1d7e931ab72 100644
--- a/include/uapi/linux/input.h
+++ b/include/uapi/linux/input.h
@@ -739,6 +739,13 @@ struct input_keymap_entry {
 #define KEY_BRIGHTNESS_MIN		0x250	/* Set Brightness to Minimum */
 #define KEY_BRIGHTNESS_MAX		0x251	/* Set Brightness to Maximum */
 
+#define KEY_KBDINPUTASSIST_PREV		0x260
+#define KEY_KBDINPUTASSIST_NEXT		0x261
+#define KEY_KBDINPUTASSIST_PREVGROUP		0x262
+#define KEY_KBDINPUTASSIST_NEXTGROUP		0x263
+#define KEY_KBDINPUTASSIST_ACCEPT		0x264
+#define KEY_KBDINPUTASSIST_CANCEL		0x265
+
 #define BTN_TRIGGER_HAPPY		0x2c0
 #define BTN_TRIGGER_HAPPY1		0x2c0
 #define BTN_TRIGGER_HAPPY2		0x2c1
-- 
cgit v1.2.3


From 0d7a855526dd672e114aff2ac22b60fc6f155b08 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 24 Oct 2014 00:14:37 +0200
Subject: vfs: add RENAME_WHITEOUT

This adds a new RENAME_WHITEOUT flag.  This flag makes rename() create a
whiteout of source.  The whiteout creation is atomic relative to the
rename.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/namei.c              | 8 ++++++--
 include/uapi/linux/fs.h | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/uapi')

diff --git a/fs/namei.c b/fs/namei.c
index d20191c0ebf5..42df664e95e5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4209,12 +4209,16 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
 	bool should_retry = false;
 	int error;
 
-	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
 		return -EINVAL;
 
-	if ((flags & RENAME_NOREPLACE) && (flags & RENAME_EXCHANGE))
+	if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) &&
+	    (flags & RENAME_EXCHANGE))
 		return -EINVAL;
 
+	if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD))
+		return -EPERM;
+
 retry:
 	from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags);
 	if (IS_ERR(from)) {
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index ca1a11bb4443..3735fa0a6784 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -37,6 +37,7 @@
 
 #define RENAME_NOREPLACE	(1 << 0)	/* Don't overwrite target */
 #define RENAME_EXCHANGE		(1 << 1)	/* Exchange source and dest */
+#define RENAME_WHITEOUT		(1 << 2)	/* Whiteout source */
 
 struct fstrim_range {
 	__u64 start;
-- 
cgit v1.2.3


From 607ec6a5abb637642e5b8199828f39ceae83cfb7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Fri, 24 Oct 2014 08:57:01 -0200
Subject: Revert "[media] v4l2-dv-timings: fix a sparse warning"

Sparse got a fix for that. Also, it is suspected that reverting
this patch might cause compilation breakages on userspace. So,
revert it.

This reverts commit 5c2cacc1028917168b0f7650008dceaa6f7e3fe2.

Requested-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/v4l2-dv-timings.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/v4l2-dv-timings.h b/include/uapi/linux/v4l2-dv-timings.h
index 6a0764c89fcb..6c8f159e416e 100644
--- a/include/uapi/linux/v4l2-dv-timings.h
+++ b/include/uapi/linux/v4l2-dv-timings.h
@@ -21,8 +21,17 @@
 #ifndef _V4L2_DV_TIMINGS_H
 #define _V4L2_DV_TIMINGS_H
 
+#if __GNUC__ < 4 || (__GNUC__ == 4 && (__GNUC_MINOR__ < 6))
+/* Sadly gcc versions older than 4.6 have a bug in how they initialize
+   anonymous unions where they require additional curly brackets.
+   This violates the C1x standard. This workaround adds the curly brackets
+   if needed. */
 #define V4L2_INIT_BT_TIMINGS(_width, args...) \
 	{ .bt = { _width , ## args } }
+#else
+#define V4L2_INIT_BT_TIMINGS(_width, args...) \
+	.bt = { _width , ## args }
+#endif
 
 /* CEA-861-E timings (i.e. standard HDTV timings) */
 
-- 
cgit v1.2.3


From fcd964dda5ece2fa77f78f843bc3455348787282 Mon Sep 17 00:00:00 2001
From: Chen Hanxiao <chenhanxiao@cn.fujitsu.com>
Date: Tue, 7 Oct 2014 17:29:07 +0800
Subject: sched: Update comments for CLONE_NEWNS

Signed-off-by: Chen Hanxiao <chenhanxiao@cn.fujitsu.com>
Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-api@vger.kernel.org
Link: http://lkml.kernel.org/r/1412674147-8941-1-git-send-email-chenhanxiao@cn.fujitsu.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 34f9d7387d13..b932be9f5c5b 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -13,7 +13,7 @@
 #define CLONE_VFORK	0x00004000	/* set if the parent wants the child to wake it up on mm_release */
 #define CLONE_PARENT	0x00008000	/* set if we want to have the same parent as the cloner */
 #define CLONE_THREAD	0x00010000	/* Same thread group? */
-#define CLONE_NEWNS	0x00020000	/* New namespace group? */
+#define CLONE_NEWNS	0x00020000	/* New mount namespace group */
 #define CLONE_SYSVSEM	0x00040000	/* share system V SEM_UNDO semantics */
 #define CLONE_SETTLS	0x00080000	/* create a new TLS for the child */
 #define CLONE_PARENT_SETTID	0x00100000	/* set the TID in the parent */
-- 
cgit v1.2.3


From b438b1ab35507bbccc28d13f0b8286ffcf24019d Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Thu, 2 Oct 2014 22:16:36 -0700
Subject: perf: Fix typos in sample code in the perf_event.h header

struct perf_event_mmap_page has members called "index" and
"cap_user_rdpmc".  Spell them correctly in the examples.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-api@vger.kernel.org
Link: http://lkml.kernel.org/r/320ba26391a8123cc16e5f02d24d34bd404332fd.1412313343.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/perf_event.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 9269de254874..9d845404d875 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -364,7 +364,7 @@ struct perf_event_mmap_page {
 	/*
 	 * Bits needed to read the hw events in user-space.
 	 *
-	 *   u32 seq, time_mult, time_shift, idx, width;
+	 *   u32 seq, time_mult, time_shift, index, width;
 	 *   u64 count, enabled, running;
 	 *   u64 cyc, time_offset;
 	 *   s64 pmc = 0;
@@ -383,11 +383,11 @@ struct perf_event_mmap_page {
 	 *       time_shift  = pc->time_shift;
 	 *     }
 	 *
-	 *     idx = pc->index;
+	 *     index = pc->index;
 	 *     count = pc->offset;
-	 *     if (pc->cap_usr_rdpmc && idx) {
+	 *     if (pc->cap_user_rdpmc && index) {
 	 *       width = pc->pmc_width;
-	 *       pmc = rdpmc(idx - 1);
+	 *       pmc = rdpmc(index - 1);
 	 *     }
 	 *
 	 *     barrier();
@@ -415,7 +415,7 @@ struct perf_event_mmap_page {
 	};
 
 	/*
-	 * If cap_usr_rdpmc this field provides the bit-width of the value
+	 * If cap_user_rdpmc this field provides the bit-width of the value
 	 * read using the rdpmc() or equivalent instruction. This can be used
 	 * to sign extend the result like:
 	 *
@@ -439,10 +439,10 @@ struct perf_event_mmap_page {
 	 *
 	 * Where time_offset,time_mult,time_shift and cyc are read in the
 	 * seqcount loop described above. This delta can then be added to
-	 * enabled and possible running (if idx), improving the scaling:
+	 * enabled and possible running (if index), improving the scaling:
 	 *
 	 *   enabled += delta;
-	 *   if (idx)
+	 *   if (index)
 	 *     running += delta;
 	 *
 	 *   quot = count / running;
-- 
cgit v1.2.3


From 7071cf7fc435ab84df872613f613a9a055964fc1 Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Sun, 2 Nov 2014 11:31:41 -0800
Subject: uapi: add missing network related headers to kbuild

The makefile for sanitizing kernel headers uses the kbuild file
to determine which files to do. Several networking related headers
were missing. Without these headers iproute2 build would break.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/Kbuild | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index b70237e8bc37..4c94f31a8c99 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -125,6 +125,7 @@ header-y += filter.h
 header-y += firewire-cdev.h
 header-y += firewire-constants.h
 header-y += flat.h
+header-y += fou.h
 header-y += fs.h
 header-y += fsl_hypervisor.h
 header-y += fuse.h
@@ -141,6 +142,7 @@ header-y += hid.h
 header-y += hiddev.h
 header-y += hidraw.h
 header-y += hpet.h
+header-y += hsr_netlink.h
 header-y += hyperv.h
 header-y += hysdn_if.h
 header-y += i2c-dev.h
@@ -251,6 +253,7 @@ header-y += mii.h
 header-y += minix_fs.h
 header-y += mman.h
 header-y += mmtimer.h
+header-y += mpls.h
 header-y += mqueue.h
 header-y += mroute.h
 header-y += mroute6.h
@@ -424,6 +427,7 @@ header-y += virtio_net.h
 header-y += virtio_pci.h
 header-y += virtio_ring.h
 header-y += virtio_rng.h
+header=y += vm_sockets.h
 header-y += vt.h
 header-y += wait.h
 header-y += wanrouter.h
-- 
cgit v1.2.3


From 66f1c44887ba4f47d617f8ae21cf8e04e1892bd7 Mon Sep 17 00:00:00 2001
From: Gregory Fong <gregory.0xf0@gmail.com>
Date: Tue, 4 Nov 2014 11:21:21 -0800
Subject: bridge: include in6.h in if_bridge.h for struct in6_addr

if_bridge.h uses struct in6_addr ip6, but wasn't including the in6.h
header.  Thomas Backlund originally sent a patch to do this, but this
revealed a redefinition issue: https://lkml.org/lkml/2013/1/13/116

The redefinition issue should have been fixed by the following Linux
commits:
ee262ad827f89e2dc7851ec2986953b5b125c6bc inet: defines IPPROTO_* needed for module alias generation
cfd280c91253cc28e4919e349fa7a813b63e71e8 net: sync some IP headers with glibc

and the following glibc commit:
6c82a2f8d7c8e21e39237225c819f182ae438db3 Coordinate IPv6 definitions for Linux and glibc

so actually include the header now.

Reported-by: Colin Guthrie <colin@mageia.org>
Reported-by: Christiaan Welvaart <cjw@daneel.dyndns.org>
Reported-by: Thomas Backlund <tmb@mageia.org>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Gregory Fong <gregory.0xf0@gmail.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 39f621a9fe82..da17e456908d 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -15,6 +15,7 @@
 
 #include <linux/types.h>
 #include <linux/if_ether.h>
+#include <linux/in6.h>
 
 #define SYSFS_BRIDGE_ATTR	"bridge"
 #define SYSFS_BRIDGE_FDB	"brforward"
-- 
cgit v1.2.3