From 879aa24d6394aa04b690a600a41ff500441ad384 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 26 Nov 2010 06:47:28 -0300
Subject: [media] V4L: improve the BKL replacement heuristic

The BKL replacement mutex had some serious performance side-effects on
V4L drivers. It is replaced by a better heuristic that works around the
worst of the side-effects.

Read the v4l2-dev.c comments for the whole sorry story. This is a
temporary measure only until we can convert all v4l drivers to use
unlocked_ioctl.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-device.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h
index 6648036b728d..b16f307d471a 100644
--- a/include/media/v4l2-device.h
+++ b/include/media/v4l2-device.h
@@ -51,6 +51,8 @@ struct v4l2_device {
 			unsigned int notification, void *arg);
 	/* The control handler. May be NULL. */
 	struct v4l2_ctrl_handler *ctrl_handler;
+	/* BKL replacement mutex. Temporary solution only. */
+	struct mutex ioctl_lock;
 };
 
 /* Initialize v4l2_dev and make dev->driver_data point to v4l2_dev.
-- 
cgit v1.2.3


From a757ee2216211278680dd8ac869aabe7b4a9970d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Thu, 2 Dec 2010 01:57:03 -0200
Subject: [media] Don't export format_by_forcc on two different drivers

Drivers should append their name on exported symbols, to avoid
conflicts with allyesconfig:

drivers/staging/built-in.o: In function `format_by_fourcc':
/home/v4l/work_trees/linus/drivers/staging/cx25821/cx25821-video.c:96: multiple definition of `format_by_fourcc'
drivers/media/built-in.o:/home/v4l/work_trees/linus/drivers/media/common/saa7146_video.c:88: first defined here

Let's rename both occurences with a small shellscript:

for i in drivers/staging/cx25821/*.[ch]; do sed s,format_by_fourcc,cx25821_format_by_fourcc,g <$i >a && mv a $i; done
for i in drivers/media/common/saa7146*.[ch]; do sed s,format_by_fourcc,saa7146_format_by_fourcc,g <$i >a && mv a $i; done
for i in include/media/saa7146*.[ch]; do sed s,format_by_fourcc,saa7146_format_by_fourcc,g <$i >a && mv a $i; done

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/common/saa7146_hlp.c      |  8 ++++----
 drivers/media/common/saa7146_video.c    | 16 ++++++++--------
 drivers/staging/cx25821/cx25821-video.c |  8 ++++----
 drivers/staging/cx25821/cx25821-video.h |  2 +-
 include/media/saa7146.h                 |  2 +-
 5 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/media/common/saa7146_hlp.c b/drivers/media/common/saa7146_hlp.c
index 05bde9ccb770..1d1d8d200755 100644
--- a/drivers/media/common/saa7146_hlp.c
+++ b/drivers/media/common/saa7146_hlp.c
@@ -558,7 +558,7 @@ static void saa7146_set_window(struct saa7146_dev *dev, int width, int height, e
 static void saa7146_set_position(struct saa7146_dev *dev, int w_x, int w_y, int w_height, enum v4l2_field field, u32 pixelformat)
 {
 	struct saa7146_vv *vv = dev->vv_data;
-	struct saa7146_format *sfmt = format_by_fourcc(dev, pixelformat);
+	struct saa7146_format *sfmt = saa7146_format_by_fourcc(dev, pixelformat);
 
 	int b_depth = vv->ov_fmt->depth;
 	int b_bpl = vv->ov_fb.fmt.bytesperline;
@@ -702,7 +702,7 @@ static int calculate_video_dma_grab_packed(struct saa7146_dev* dev, struct saa71
 	struct saa7146_vv *vv = dev->vv_data;
 	struct saa7146_video_dma vdma1;
 
-	struct saa7146_format *sfmt = format_by_fourcc(dev,buf->fmt->pixelformat);
+	struct saa7146_format *sfmt = saa7146_format_by_fourcc(dev,buf->fmt->pixelformat);
 
 	int width = buf->fmt->width;
 	int height = buf->fmt->height;
@@ -827,7 +827,7 @@ static int calculate_video_dma_grab_planar(struct saa7146_dev* dev, struct saa71
 	struct saa7146_video_dma vdma2;
 	struct saa7146_video_dma vdma3;
 
-	struct saa7146_format *sfmt = format_by_fourcc(dev,buf->fmt->pixelformat);
+	struct saa7146_format *sfmt = saa7146_format_by_fourcc(dev,buf->fmt->pixelformat);
 
 	int width = buf->fmt->width;
 	int height = buf->fmt->height;
@@ -994,7 +994,7 @@ static void program_capture_engine(struct saa7146_dev *dev, int planar)
 
 void saa7146_set_capture(struct saa7146_dev *dev, struct saa7146_buf *buf, struct saa7146_buf *next)
 {
-	struct saa7146_format *sfmt = format_by_fourcc(dev,buf->fmt->pixelformat);
+	struct saa7146_format *sfmt = saa7146_format_by_fourcc(dev,buf->fmt->pixelformat);
 	struct saa7146_vv *vv = dev->vv_data;
 	u32 vdma1_prot_addr;
 
diff --git a/drivers/media/common/saa7146_video.c b/drivers/media/common/saa7146_video.c
index 741c5732b430..d246910129e8 100644
--- a/drivers/media/common/saa7146_video.c
+++ b/drivers/media/common/saa7146_video.c
@@ -84,7 +84,7 @@ static struct saa7146_format formats[] = {
 
 static int NUM_FORMATS = sizeof(formats)/sizeof(struct saa7146_format);
 
-struct saa7146_format* format_by_fourcc(struct saa7146_dev *dev, int fourcc)
+struct saa7146_format* saa7146_format_by_fourcc(struct saa7146_dev *dev, int fourcc)
 {
 	int i, j = NUM_FORMATS;
 
@@ -266,7 +266,7 @@ static int saa7146_pgtable_build(struct saa7146_dev *dev, struct saa7146_buf *bu
 	struct videobuf_dmabuf *dma=videobuf_to_dma(&buf->vb);
 	struct scatterlist *list = dma->sglist;
 	int length = dma->sglen;
-	struct saa7146_format *sfmt = format_by_fourcc(dev,buf->fmt->pixelformat);
+	struct saa7146_format *sfmt = saa7146_format_by_fourcc(dev,buf->fmt->pixelformat);
 
 	DEB_EE(("dev:%p, buf:%p, sg_len:%d\n",dev,buf,length));
 
@@ -408,7 +408,7 @@ static int video_begin(struct saa7146_fh *fh)
 		}
 	}
 
-	fmt = format_by_fourcc(dev,fh->video_fmt.pixelformat);
+	fmt = saa7146_format_by_fourcc(dev,fh->video_fmt.pixelformat);
 	/* we need to have a valid format set here */
 	BUG_ON(NULL == fmt);
 
@@ -460,7 +460,7 @@ static int video_end(struct saa7146_fh *fh, struct file *file)
 		return -EBUSY;
 	}
 
-	fmt = format_by_fourcc(dev,fh->video_fmt.pixelformat);
+	fmt = saa7146_format_by_fourcc(dev,fh->video_fmt.pixelformat);
 	/* we need to have a valid format set here */
 	BUG_ON(NULL == fmt);
 
@@ -536,7 +536,7 @@ static int vidioc_s_fbuf(struct file *file, void *fh, struct v4l2_framebuffer *f
 		return -EPERM;
 
 	/* check args */
-	fmt = format_by_fourcc(dev, fb->fmt.pixelformat);
+	fmt = saa7146_format_by_fourcc(dev, fb->fmt.pixelformat);
 	if (NULL == fmt)
 		return -EINVAL;
 
@@ -760,7 +760,7 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *fh, struct v4l2_forma
 
 	DEB_EE(("V4L2_BUF_TYPE_VIDEO_CAPTURE: dev:%p, fh:%p\n", dev, fh));
 
-	fmt = format_by_fourcc(dev, f->fmt.pix.pixelformat);
+	fmt = saa7146_format_by_fourcc(dev, f->fmt.pix.pixelformat);
 	if (NULL == fmt)
 		return -EINVAL;
 
@@ -1264,7 +1264,7 @@ static int buffer_prepare(struct videobuf_queue *q,
 		buf->fmt       = &fh->video_fmt;
 		buf->vb.field  = fh->video_fmt.field;
 
-		sfmt = format_by_fourcc(dev,buf->fmt->pixelformat);
+		sfmt = saa7146_format_by_fourcc(dev,buf->fmt->pixelformat);
 
 		release_all_pagetables(dev, buf);
 		if( 0 != IS_PLANAR(sfmt->trans)) {
@@ -1378,7 +1378,7 @@ static int video_open(struct saa7146_dev *dev, struct file *file)
 	fh->video_fmt.pixelformat = V4L2_PIX_FMT_BGR24;
 	fh->video_fmt.bytesperline = 0;
 	fh->video_fmt.field = V4L2_FIELD_ANY;
-	sfmt = format_by_fourcc(dev,fh->video_fmt.pixelformat);
+	sfmt = saa7146_format_by_fourcc(dev,fh->video_fmt.pixelformat);
 	fh->video_fmt.sizeimage = (fh->video_fmt.width * fh->video_fmt.height * sfmt->depth)/8;
 
 	videobuf_queue_sg_init(&fh->video_q, &video_qops,
diff --git a/drivers/staging/cx25821/cx25821-video.c b/drivers/staging/cx25821/cx25821-video.c
index e7f1d5778cec..52389308f333 100644
--- a/drivers/staging/cx25821/cx25821-video.c
+++ b/drivers/staging/cx25821/cx25821-video.c
@@ -92,7 +92,7 @@ int cx25821_get_format_size(void)
 	return ARRAY_SIZE(formats);
 }
 
-struct cx25821_fmt *format_by_fourcc(unsigned int fourcc)
+struct cx25821_fmt *cx25821_format_by_fourcc(unsigned int fourcc)
 {
 	unsigned int i;
 
@@ -848,7 +848,7 @@ static int video_open(struct file *file)
        pix_format =
 	   (dev->channels[ch_id].pixel_formats ==
 	    PIXEL_FRMT_411) ? V4L2_PIX_FMT_Y41P : V4L2_PIX_FMT_YUYV;
-       fh->fmt = format_by_fourcc(pix_format);
+       fh->fmt = cx25821_format_by_fourcc(pix_format);
 
        v4l2_prio_open(&dev->channels[ch_id].prio, &fh->prio);
 
@@ -1010,7 +1010,7 @@ static int vidioc_s_fmt_vid_cap(struct file *file, void *priv,
        if (0 != err)
 	       return err;
 
-       fh->fmt = format_by_fourcc(f->fmt.pix.pixelformat);
+       fh->fmt = cx25821_format_by_fourcc(f->fmt.pix.pixelformat);
        fh->vidq.field = f->fmt.pix.field;
 
        /* check if width and height is valid based on set standard */
@@ -1119,7 +1119,7 @@ int cx25821_vidioc_try_fmt_vid_cap(struct file *file, void *priv, struct v4l2_fo
 	enum v4l2_field field;
 	unsigned int maxw, maxh;
 
-	fmt = format_by_fourcc(f->fmt.pix.pixelformat);
+	fmt = cx25821_format_by_fourcc(f->fmt.pix.pixelformat);
 	if (NULL == fmt)
 		return -EINVAL;
 
diff --git a/drivers/staging/cx25821/cx25821-video.h b/drivers/staging/cx25821/cx25821-video.h
index cc6034b1a95d..a2415d33235b 100644
--- a/drivers/staging/cx25821/cx25821-video.h
+++ b/drivers/staging/cx25821/cx25821-video.h
@@ -87,7 +87,7 @@ extern unsigned int vid_limit;
 
 #define FORMAT_FLAGS_PACKED       0x01
 extern struct cx25821_fmt formats[];
-extern struct cx25821_fmt *format_by_fourcc(unsigned int fourcc);
+extern struct cx25821_fmt *cx25821_format_by_fourcc(unsigned int fourcc);
 extern struct cx25821_data timeout_data[MAX_VID_CHANNEL_NUM];
 
 extern void cx25821_dump_video_queue(struct cx25821_dev *dev,
diff --git a/include/media/saa7146.h b/include/media/saa7146.h
index 7a9f76ecbbbd..ac7ce00f39cf 100644
--- a/include/media/saa7146.h
+++ b/include/media/saa7146.h
@@ -161,7 +161,7 @@ extern struct list_head saa7146_devices;
 extern struct mutex saa7146_devices_lock;
 int saa7146_register_extension(struct saa7146_extension*);
 int saa7146_unregister_extension(struct saa7146_extension*);
-struct saa7146_format* format_by_fourcc(struct saa7146_dev *dev, int fourcc);
+struct saa7146_format* saa7146_format_by_fourcc(struct saa7146_dev *dev, int fourcc);
 int saa7146_pgtable_alloc(struct pci_dev *pci, struct saa7146_pgtable *pt);
 void saa7146_pgtable_free(struct pci_dev *pci, struct saa7146_pgtable *pt);
 int saa7146_pgtable_build_single(struct pci_dev *pci, struct saa7146_pgtable *pt, struct scatterlist *list, int length );
-- 
cgit v1.2.3


From 88d60c32765716289abeb362c44adf6c35c6824c Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 8 Nov 2010 18:19:22 -0500
Subject: fanotify: remove packed from access response message

Since fanotify has decided to be careful about alignment and packing
rather than rely on __attribute__((packed)) for multiarch support.
Since this attribute isn't doing anything on fanotify_response we just
drop it.  This does not break API/ABI.

Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@sophos.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fanotify.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 0f0121467fc4..bdbf9bb29b54 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -96,7 +96,7 @@ struct fanotify_event_metadata {
 struct fanotify_response {
 	__s32 fd;
 	__u32 response;
-} __attribute__ ((packed));
+};
 
 /* Legit userspace responses to a _PERM event */
 #define FAN_ALLOW	0x01
-- 
cgit v1.2.3


From b1085ba80cd2784400a7beec3fda5099198ed01c Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Fri, 5 Nov 2010 17:05:27 +0100
Subject: fanotify: if set by user unset FMODE_NONOTIFY before fsnotify_perm()
 is called

Unsetting FMODE_NONOTIFY in fsnotify_open() is too late, since fsnotify_perm()
is called before. If FMODE_NONOTIFY is set fsnotify_perm() will skip permission
checks, so a user can still disable permission checks by setting this flag
in an open() call.
This patch corrects this by unsetting the flag before fsnotify_perm is called.

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/namei.c               | 3 +++
 include/linux/fsnotify.h | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index 5362af9b7372..4ff7ca530533 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1748,6 +1748,9 @@ struct file *do_filp_open(int dfd, const char *pathname,
 	if (!(open_flag & O_CREAT))
 		mode = 0;
 
+	/* Must never be set by userspace */
+	open_flag &= ~FMODE_NONOTIFY;
+
 	/*
 	 * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
 	 * check for O_DSYNC if the need any syncing at all we enforce it's
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 5c185fa27089..b10bcdeaef76 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -235,9 +235,6 @@ static inline void fsnotify_open(struct file *file)
 	if (S_ISDIR(inode->i_mode))
 		mask |= FS_ISDIR;
 
-	/* FMODE_NONOTIFY must never be set from user */
-	file->f_mode &= ~FMODE_NONOTIFY;
-
 	fsnotify_parent(path, NULL, mask);
 	fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0);
 }
-- 
cgit v1.2.3


From 09e5f14e57c70f9d357862bb56e57026c51092a1 Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Fri, 19 Nov 2010 10:58:07 +0100
Subject: fanotify: on group destroy allow all waiters to bypass permission
 check

When fanotify_release() is called, there may still be processes waiting for
access permission. Currently only processes for which an event has already been
queued into the groups access list will be woken up.  Processes for which no
event has been queued will continue to sleep and thus cause a deadlock when
fsnotify_put_group() is called.
Furthermore there is a race allowing further processes to be waiting on the
access wait queue after wake_up (if they arrive before clear_marks_by_group()
is called).
This patch corrects this by setting a flag to inform processes that the group
is about to be destroyed and thus not to wait for access permission.

[additional changelog from eparis]
Lets think about the 4 relevant code paths from the PoV of the
'operator' 'listener' 'responder' and 'closer'.  Where operator is the
process doing an action (like open/read) which could require permission.
Listener is the task (or in this case thread) slated with reading from
the fanotify file descriptor.  The 'responder' is the thread responsible
for responding to access requests.  'Closer' is the thread attempting to
close the fanotify file descriptor.

The 'operator' is going to end up in:
fanotify_handle_event()
  get_response_from_access()
    (THIS BLOCKS WAITING ON USERSPACE)

The 'listener' interesting code path
fanotify_read()
  copy_event_to_user()
    prepare_for_access_response()
      (THIS CREATES AN fanotify_response_event)

The 'responder' code path:
fanotify_write()
  process_access_response()
    (REMOVE A fanotify_response_event, SET RESPONSE, WAKE UP 'operator')

The 'closer':
fanotify_release()
  (SUPPOSED TO CLEAN UP THE REST OF THIS MESS)

What we have today is that in the closer we remove all of the
fanotify_response_events and set a bit so no more response events are
ever created in prepare_for_access_response().

The bug is that we never wake all of the operators up and tell them to
move along.  You fix that in fanotify_get_response_from_access().  You
also fix other operators which haven't gotten there yet.  So I agree
that's a good fix.
[/additional changelog from eparis]

[remove additional changes to minimize patch size]
[move initialization so it was inside CONFIG_FANOTIFY_PERMISSION]

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify.c      | 6 +++++-
 fs/notify/fanotify/fanotify_user.c | 5 +++--
 include/linux/fsnotify_backend.h   | 2 +-
 3 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index b04f88eed09e..f35794b97e8e 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -92,7 +92,11 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	wait_event(group->fanotify_data.access_waitq, event->response);
+	wait_event(group->fanotify_data.access_waitq, event->response ||
+				atomic_read(&group->fanotify_data.bypass_perm));
+
+	if (!event->response) /* bypass_perm set */
+		return 0;
 
 	/* userspace responded, convert to something usable */
 	spin_lock(&event->lock);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 480434c5ee5f..01fffe62a2d4 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -200,7 +200,7 @@ static int prepare_for_access_response(struct fsnotify_group *group,
 
 	mutex_lock(&group->fanotify_data.access_mutex);
 
-	if (group->fanotify_data.bypass_perm) {
+	if (atomic_read(&group->fanotify_data.bypass_perm)) {
 		mutex_unlock(&group->fanotify_data.access_mutex);
 		kmem_cache_free(fanotify_response_event_cache, re);
 		event->response = FAN_ALLOW;
@@ -390,7 +390,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 
 	mutex_lock(&group->fanotify_data.access_mutex);
 
-	group->fanotify_data.bypass_perm = true;
+	atomic_inc(&group->fanotify_data.bypass_perm);
 
 	list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) {
 		pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group,
@@ -703,6 +703,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	mutex_init(&group->fanotify_data.access_mutex);
 	init_waitqueue_head(&group->fanotify_data.access_waitq);
 	INIT_LIST_HEAD(&group->fanotify_data.access_list);
+	atomic_set(&group->fanotify_data.bypass_perm, 0);
 #endif
 	switch (flags & FAN_ALL_CLASS_BITS) {
 	case FAN_CLASS_NOTIF:
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 0a68f924f06f..7380763595d3 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -166,7 +166,7 @@ struct fsnotify_group {
 			struct mutex access_mutex;
 			struct list_head access_list;
 			wait_queue_head_t access_waitq;
-			bool bypass_perm; /* protected by access_mutex */
+			atomic_t bypass_perm;
 #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
 			int f_flags;
 			unsigned int max_marks;
-- 
cgit v1.2.3


From e9a3854fd4ff3907e6c200a3980e19365ee695e9 Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Wed, 24 Nov 2010 18:22:09 +0100
Subject: fanotify: Introduce FAN_NOFD

FAN_NOFD is used in fanotify events that do not provide an open file
descriptor (like the overflow_event).

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fanotify.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index bdbf9bb29b54..c73224315aee 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -101,6 +101,8 @@ struct fanotify_response {
 /* Legit userspace responses to a _PERM event */
 #define FAN_ALLOW	0x01
 #define FAN_DENY	0x02
+/* No fd set in event */
+#define FAN_NOFD	-1
 
 /* Helper functions to deal with fanotify_event_metadata buffers */
 #define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata))
-- 
cgit v1.2.3


From 5167695753c63444a9e6cbbef136200a16c7a225 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 7 Dec 2010 14:18:20 +0100
Subject: perf: Fix duplicate events with multiple-pmu vs software events

Because the multi-pmu bits can share contexts between struct pmu
instances we could get duplicate events by iterating the pmu list.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |  1 +
 kernel/perf_event.c        | 35 +++++++++++++++++++++++++++++------
 2 files changed, 30 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index de2c41758e29..4f1279e105ee 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -887,6 +887,7 @@ struct perf_cpu_context {
 	int				exclusive;
 	struct list_head		rotation_list;
 	int				jiffies_interval;
+	struct pmu			*active_pmu;
 };
 
 struct perf_output_handle {
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index eac7e3364335..7b870174c56d 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3824,6 +3824,8 @@ static void perf_event_task_event(struct perf_task_event *task_event)
 	rcu_read_lock();
 	list_for_each_entry_rcu(pmu, &pmus, entry) {
 		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
+		if (cpuctx->active_pmu != pmu)
+			goto next;
 		perf_event_task_ctx(&cpuctx->ctx, task_event);
 
 		ctx = task_event->task_ctx;
@@ -3959,6 +3961,8 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
 	rcu_read_lock();
 	list_for_each_entry_rcu(pmu, &pmus, entry) {
 		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
+		if (cpuctx->active_pmu != pmu)
+			goto next;
 		perf_event_comm_ctx(&cpuctx->ctx, comm_event);
 
 		ctxn = pmu->task_ctx_nr;
@@ -4144,6 +4148,8 @@ got_name:
 	rcu_read_lock();
 	list_for_each_entry_rcu(pmu, &pmus, entry) {
 		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
+		if (cpuctx->active_pmu != pmu)
+			goto next;
 		perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
 					vma->vm_flags & VM_EXEC);
 
@@ -5145,20 +5151,36 @@ static void *find_pmu_context(int ctxn)
 	return NULL;
 }
 
-static void free_pmu_context(void * __percpu cpu_context)
+static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu)
 {
-	struct pmu *pmu;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct perf_cpu_context *cpuctx;
+
+		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+
+		if (cpuctx->active_pmu == old_pmu)
+			cpuctx->active_pmu = pmu;
+	}
+}
+
+static void free_pmu_context(struct pmu *pmu)
+{
+	struct pmu *i;
 
 	mutex_lock(&pmus_lock);
 	/*
 	 * Like a real lame refcount.
 	 */
-	list_for_each_entry(pmu, &pmus, entry) {
-		if (pmu->pmu_cpu_context == cpu_context)
+	list_for_each_entry(i, &pmus, entry) {
+		if (i->pmu_cpu_context == pmu->pmu_cpu_context) {
+			update_pmu_context(i, pmu);
 			goto out;
+		}
 	}
 
-	free_percpu(cpu_context);
+	free_percpu(pmu->pmu_cpu_context);
 out:
 	mutex_unlock(&pmus_lock);
 }
@@ -5190,6 +5212,7 @@ int perf_pmu_register(struct pmu *pmu)
 		cpuctx->ctx.pmu = pmu;
 		cpuctx->jiffies_interval = 1;
 		INIT_LIST_HEAD(&cpuctx->rotation_list);
+		cpuctx->active_pmu = pmu;
 	}
 
 got_cpu_context:
@@ -5241,7 +5264,7 @@ void perf_pmu_unregister(struct pmu *pmu)
 	synchronize_rcu();
 
 	free_percpu(pmu->pmu_disable_count);
-	free_pmu_context(pmu->pmu_cpu_context);
+	free_pmu_context(pmu);
 }
 
 struct pmu *perf_init_event(struct perf_event *event)
-- 
cgit v1.2.3


From 0f004f5a696a9434b7214d0d3cbd0525ee77d428 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 30 Nov 2010 19:48:45 +0100
Subject: sched: Cure more NO_HZ load average woes

There's a long-running regression that proved difficult to fix and
which is hitting certain people and is rather annoying in its effects.

Damien reported that after 74f5187ac8 (sched: Cure load average vs
NO_HZ woes) his load average is unnaturally high, he also noted that
even with that patch reverted the load avgerage numbers are not
correct.

The problem is that the previous patch only solved half the NO_HZ
problem, it addressed the part of going into NO_HZ mode, not of
comming out of NO_HZ mode. This patch implements that missing half.

When comming out of NO_HZ mode there are two important things to take
care of:

 - Folding the pending idle delta into the global active count.
 - Correctly aging the averages for the idle-duration.

So with this patch the NO_HZ interaction should be complete and
behaviour between CONFIG_NO_HZ=[yn] should be equivalent.

Furthermore, this patch slightly changes the load average computation
by adding a rounding term to the fixed point multiplication.

Reported-by: Damien Wyart <damien.wyart@free.fr>
Reported-by: Tim McGrath <tmhikaru@gmail.com>
Tested-by: Damien Wyart <damien.wyart@free.fr>
Tested-by: Orion Poplawski <orion@cora.nwra.com>
Tested-by: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: stable@kernel.org
Cc: Chase Douglas <chase.douglas@canonical.com>
LKML-Reference: <1291129145.32004.874.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |   2 +-
 kernel/sched.c        | 150 ++++++++++++++++++++++++++++++++++++++++++++++----
 kernel/timer.c        |   2 +-
 3 files changed, 141 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c79e921a68b..223874538b33 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -143,7 +143,7 @@ extern unsigned long nr_iowait_cpu(int cpu);
 extern unsigned long this_cpu_load(void);
 
 
-extern void calc_global_load(void);
+extern void calc_global_load(unsigned long ticks);
 
 extern unsigned long get_parent_ip(unsigned long addr);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index dc91a4d09ac3..6b7c26a1a097 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3119,6 +3119,15 @@ static long calc_load_fold_active(struct rq *this_rq)
 	return delta;
 }
 
+static unsigned long
+calc_load(unsigned long load, unsigned long exp, unsigned long active)
+{
+	load *= exp;
+	load += active * (FIXED_1 - exp);
+	load += 1UL << (FSHIFT - 1);
+	return load >> FSHIFT;
+}
+
 #ifdef CONFIG_NO_HZ
 /*
  * For NO_HZ we delay the active fold to the next LOAD_FREQ update.
@@ -3148,6 +3157,128 @@ static long calc_load_fold_idle(void)
 
 	return delta;
 }
+
+/**
+ * fixed_power_int - compute: x^n, in O(log n) time
+ *
+ * @x:         base of the power
+ * @frac_bits: fractional bits of @x
+ * @n:         power to raise @x to.
+ *
+ * By exploiting the relation between the definition of the natural power
+ * function: x^n := x*x*...*x (x multiplied by itself for n times), and
+ * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
+ * (where: n_i \elem {0, 1}, the binary vector representing n),
+ * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
+ * of course trivially computable in O(log_2 n), the length of our binary
+ * vector.
+ */
+static unsigned long
+fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
+{
+	unsigned long result = 1UL << frac_bits;
+
+	if (n) for (;;) {
+		if (n & 1) {
+			result *= x;
+			result += 1UL << (frac_bits - 1);
+			result >>= frac_bits;
+		}
+		n >>= 1;
+		if (!n)
+			break;
+		x *= x;
+		x += 1UL << (frac_bits - 1);
+		x >>= frac_bits;
+	}
+
+	return result;
+}
+
+/*
+ * a1 = a0 * e + a * (1 - e)
+ *
+ * a2 = a1 * e + a * (1 - e)
+ *    = (a0 * e + a * (1 - e)) * e + a * (1 - e)
+ *    = a0 * e^2 + a * (1 - e) * (1 + e)
+ *
+ * a3 = a2 * e + a * (1 - e)
+ *    = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
+ *    = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
+ *
+ *  ...
+ *
+ * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
+ *    = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
+ *    = a0 * e^n + a * (1 - e^n)
+ *
+ * [1] application of the geometric series:
+ *
+ *              n         1 - x^(n+1)
+ *     S_n := \Sum x^i = -------------
+ *             i=0          1 - x
+ */
+static unsigned long
+calc_load_n(unsigned long load, unsigned long exp,
+	    unsigned long active, unsigned int n)
+{
+
+	return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
+}
+
+/*
+ * NO_HZ can leave us missing all per-cpu ticks calling
+ * calc_load_account_active(), but since an idle CPU folds its delta into
+ * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold
+ * in the pending idle delta if our idle period crossed a load cycle boundary.
+ *
+ * Once we've updated the global active value, we need to apply the exponential
+ * weights adjusted to the number of cycles missed.
+ */
+static void calc_global_nohz(unsigned long ticks)
+{
+	long delta, active, n;
+
+	if (time_before(jiffies, calc_load_update))
+		return;
+
+	/*
+	 * If we crossed a calc_load_update boundary, make sure to fold
+	 * any pending idle changes, the respective CPUs might have
+	 * missed the tick driven calc_load_account_active() update
+	 * due to NO_HZ.
+	 */
+	delta = calc_load_fold_idle();
+	if (delta)
+		atomic_long_add(delta, &calc_load_tasks);
+
+	/*
+	 * If we were idle for multiple load cycles, apply them.
+	 */
+	if (ticks >= LOAD_FREQ) {
+		n = ticks / LOAD_FREQ;
+
+		active = atomic_long_read(&calc_load_tasks);
+		active = active > 0 ? active * FIXED_1 : 0;
+
+		avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
+		avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
+		avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
+
+		calc_load_update += n * LOAD_FREQ;
+	}
+
+	/*
+	 * Its possible the remainder of the above division also crosses
+	 * a LOAD_FREQ period, the regular check in calc_global_load()
+	 * which comes after this will take care of that.
+	 *
+	 * Consider us being 11 ticks before a cycle completion, and us
+	 * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will
+	 * age us 4 cycles, and the test in calc_global_load() will
+	 * pick up the final one.
+	 */
+}
 #else
 static void calc_load_account_idle(struct rq *this_rq)
 {
@@ -3157,6 +3288,10 @@ static inline long calc_load_fold_idle(void)
 {
 	return 0;
 }
+
+static void calc_global_nohz(unsigned long ticks)
+{
+}
 #endif
 
 /**
@@ -3174,24 +3309,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
 	loads[2] = (avenrun[2] + offset) << shift;
 }
 
-static unsigned long
-calc_load(unsigned long load, unsigned long exp, unsigned long active)
-{
-	load *= exp;
-	load += active * (FIXED_1 - exp);
-	return load >> FSHIFT;
-}
-
 /*
  * calc_load - update the avenrun load estimates 10 ticks after the
  * CPUs have updated calc_load_tasks.
  */
-void calc_global_load(void)
+void calc_global_load(unsigned long ticks)
 {
-	unsigned long upd = calc_load_update + 10;
 	long active;
 
-	if (time_before(jiffies, upd))
+	calc_global_nohz(ticks);
+
+	if (time_before(jiffies, calc_load_update + 10))
 		return;
 
 	active = atomic_long_read(&calc_load_tasks);
diff --git a/kernel/timer.c b/kernel/timer.c
index 68a9ae7679b7..7bd715fda974 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1319,7 +1319,7 @@ void do_timer(unsigned long ticks)
 {
 	jiffies_64 += ticks;
 	update_wall_time();
-	calc_global_load();
+	calc_global_load(ticks);
 }
 
 #ifdef __ARCH_WANT_SYS_ALARM
-- 
cgit v1.2.3


From 53dde5f385bc56e312f78b7cb25ffaf8efd4735d Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Tue, 16 Nov 2010 13:23:50 -0800
Subject: bootmem: Add alloc_bootmem_align()

Add an alloc_bootmem_align() interface to allocate bootmem with
specified alignment.  This is necessary to be able to allocate the
xsave area in a subsequent patch.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
LKML-Reference: <20101116212441.977574826@sbsiddha-MOBL3.sc.intel.com>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: <stable@kernel.org>
---
 include/linux/bootmem.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 266ab9291232..499dfe982a0e 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -105,6 +105,8 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 
 #define alloc_bootmem(x) \
 	__alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+#define alloc_bootmem_align(x, align) \
+	__alloc_bootmem(x, align, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_nopanic(x) \
 	__alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_pages(x) \
-- 
cgit v1.2.3


From ab4e0192196b8d4e43a3945742d4996da934a86f Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 14 Dec 2010 23:53:21 -0800
Subject: Input: define separate EVIOCGKEYCODE_V2/EVIOCSKEYCODE_V2

The desire to keep old names for the EVIOCGKEYCODE/EVIOCSKEYCODE while
extending them to support large scancodes was a mistake. While we tried
to keep ABI intact (and we succeeded in doing that, programs compiled
on older kernels will work on newer ones) there is still a problem with
recompiling existing software with newer kernel headers.

New kernel headers will supply updated ioctl numbers and kernel will
expect that userspace will use struct input_keymap_entry to set and
retrieve keymap data. But since the names of ioctls are still the same
userspace will happily compile even if not adjusted to make use of the
new structure and will start miraculously fail in the field.

To avoid this issue let's revert EVIOCGKEYCODE/EVIOCSKEYCODE definitions
and add EVIOCGKEYCODE_V2/EVIOCSKEYCODE_V2 so that userspace can explicitly
select the style of ioctls it wants to employ.

Reviewed-by: Henrik Rydberg <rydberg@euromail.se>
Acked-by: Jarod Wilson <jarod@redhat.com>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/evdev.c | 113 ++++++++++++++++++++++++++------------------------
 include/linux/input.h |   6 ++-
 2 files changed, 62 insertions(+), 57 deletions(-)

(limited to 'include')

diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index e3f7fc6f9565..68f09a868434 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -534,76 +534,73 @@ static int handle_eviocgbit(struct input_dev *dev,
 }
 #undef OLD_KEY_MAX
 
-static int evdev_handle_get_keycode(struct input_dev *dev,
-				    void __user *p, size_t size)
+static int evdev_handle_get_keycode(struct input_dev *dev, void __user *p)
 {
-	struct input_keymap_entry ke;
+	struct input_keymap_entry ke = {
+		.len	= sizeof(unsigned int),
+		.flags	= 0,
+	};
+	int __user *ip = (int __user *)p;
 	int error;
 
-	memset(&ke, 0, sizeof(ke));
-
-	if (size == sizeof(unsigned int[2])) {
-		/* legacy case */
-		int __user *ip = (int __user *)p;
+	/* legacy case */
+	if (copy_from_user(ke.scancode, p, sizeof(unsigned int)))
+		return -EFAULT;
 
-		if (copy_from_user(ke.scancode, p, sizeof(unsigned int)))
-			return -EFAULT;
+	error = input_get_keycode(dev, &ke);
+	if (error)
+		return error;
 
-		ke.len = sizeof(unsigned int);
-		ke.flags = 0;
+	if (put_user(ke.keycode, ip + 1))
+		return -EFAULT;
 
-		error = input_get_keycode(dev, &ke);
-		if (error)
-			return error;
+	return 0;
+}
 
-		if (put_user(ke.keycode, ip + 1))
-			return -EFAULT;
+static int evdev_handle_get_keycode_v2(struct input_dev *dev, void __user *p)
+{
+	struct input_keymap_entry ke;
+	int error;
 
-	} else {
-		size = min(size, sizeof(ke));
+	if (copy_from_user(&ke, p, sizeof(ke)))
+		return -EFAULT;
 
-		if (copy_from_user(&ke, p, size))
-			return -EFAULT;
+	error = input_get_keycode(dev, &ke);
+	if (error)
+		return error;
 
-		error = input_get_keycode(dev, &ke);
-		if (error)
-			return error;
+	if (copy_to_user(p, &ke, sizeof(ke)))
+		return -EFAULT;
 
-		if (copy_to_user(p, &ke, size))
-			return -EFAULT;
-	}
 	return 0;
 }
 
-static int evdev_handle_set_keycode(struct input_dev *dev,
-				    void __user *p, size_t size)
+static int evdev_handle_set_keycode(struct input_dev *dev, void __user *p)
 {
-	struct input_keymap_entry ke;
-
-	memset(&ke, 0, sizeof(ke));
+	struct input_keymap_entry ke = {
+		.len	= sizeof(unsigned int),
+		.flags	= 0,
+	};
+	int __user *ip = (int __user *)p;
 
-	if (size == sizeof(unsigned int[2])) {
-		/* legacy case */
-		int __user *ip = (int __user *)p;
+	if (copy_from_user(ke.scancode, p, sizeof(unsigned int)))
+		return -EFAULT;
 
-		if (copy_from_user(ke.scancode, p, sizeof(unsigned int)))
-			return -EFAULT;
+	if (get_user(ke.keycode, ip + 1))
+		return -EFAULT;
 
-		if (get_user(ke.keycode, ip + 1))
-			return -EFAULT;
+	return input_set_keycode(dev, &ke);
+}
 
-		ke.len = sizeof(unsigned int);
-		ke.flags = 0;
+static int evdev_handle_set_keycode_v2(struct input_dev *dev, void __user *p)
+{
+	struct input_keymap_entry ke;
 
-	} else {
-		size = min(size, sizeof(ke));
+	if (copy_from_user(&ke, p, sizeof(ke)))
+		return -EFAULT;
 
-		if (copy_from_user(&ke, p, size))
-			return -EFAULT;
-
-		if (ke.len > sizeof(ke.scancode))
-			return -EINVAL;
-	}
+	if (ke.len > sizeof(ke.scancode))
+		return -EINVAL;
 
 	return input_set_keycode(dev, &ke);
 }
@@ -669,6 +666,18 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd,
 			return evdev_grab(evdev, client);
 		else
 			return evdev_ungrab(evdev, client);
+
+	case EVIOCGKEYCODE:
+		return evdev_handle_get_keycode(dev, p);
+
+	case EVIOCSKEYCODE:
+		return evdev_handle_set_keycode(dev, p);
+
+	case EVIOCGKEYCODE_V2:
+		return evdev_handle_get_keycode_v2(dev, p);
+
+	case EVIOCSKEYCODE_V2:
+		return evdev_handle_set_keycode_v2(dev, p);
 	}
 
 	size = _IOC_SIZE(cmd);
@@ -708,12 +717,6 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd,
 			return -EFAULT;
 
 		return error;
-
-	case EVIOC_MASK_SIZE(EVIOCGKEYCODE):
-		return evdev_handle_get_keycode(dev, p, size);
-
-	case EVIOC_MASK_SIZE(EVIOCSKEYCODE):
-		return evdev_handle_set_keycode(dev, p, size);
 	}
 
 	/* Multi-number variable-length handlers */
diff --git a/include/linux/input.h b/include/linux/input.h
index a8af21d42bc1..9777668883be 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -104,8 +104,10 @@ struct input_keymap_entry {
 #define EVIOCGREP		_IOR('E', 0x03, unsigned int[2])	/* get repeat settings */
 #define EVIOCSREP		_IOW('E', 0x03, unsigned int[2])	/* set repeat settings */
 
-#define EVIOCGKEYCODE		_IOR('E', 0x04, struct input_keymap_entry)	/* get keycode */
-#define EVIOCSKEYCODE		_IOW('E', 0x04, struct input_keymap_entry)	/* set keycode */
+#define EVIOCGKEYCODE		_IOR('E', 0x04, unsigned int[2])        /* get keycode */
+#define EVIOCGKEYCODE_V2	_IOR('E', 0x04, struct input_keymap_entry)
+#define EVIOCSKEYCODE		_IOW('E', 0x04, unsigned int[2])        /* set keycode */
+#define EVIOCSKEYCODE_V2	_IOW('E', 0x04, struct input_keymap_entry)
 
 #define EVIOCGNAME(len)		_IOC(_IOC_READ, 'E', 0x06, len)		/* get device name */
 #define EVIOCGPHYS(len)		_IOC(_IOC_READ, 'E', 0x07, len)		/* get physical location */
-- 
cgit v1.2.3


From 62731fa0c893515dc6cbc3e0a2879a92793c735f Mon Sep 17 00:00:00 2001
From: Alexey Zaytsev <alexey.zaytsev@gmail.com>
Date: Mon, 22 Nov 2010 00:33:03 +0000
Subject: fanotify: split version into version and metadata_len

To implement per event type optional headers we are interested in
knowing how long the metadata structure is.  This patch slits the __u32
version field into a __u8 version and a __u16 metadata_len field (with
__u8 left over).  This should allow for backwards compat ABI.

Signed-off-by: Alexey Zaytsev <alexey.zaytsev@gmail.com>
[rewrote descrtion and changed object sizes and ordering - eparis]
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fanotify.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index c73224315aee..6c6133f76e16 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -83,11 +83,13 @@
 				 FAN_ALL_PERM_EVENTS |\
 				 FAN_Q_OVERFLOW)
 
-#define FANOTIFY_METADATA_VERSION	2
+#define FANOTIFY_METADATA_VERSION	3
 
 struct fanotify_event_metadata {
 	__u32 event_len;
-	__u32 vers;
+	__u8 vers;
+	__u8 reserved;
+	__u16 metadata_len;
 	__aligned_u64 mask;
 	__s32 fd;
 	__s32 pid;
-- 
cgit v1.2.3


From f08f5a0add20834d3f3d876dfe08005a5df656db Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 16 Dec 2010 17:11:58 +0100
Subject: PM / Runtime: Fix pm_runtime_suspended()

There are some situations (e.g. in __pm_generic_call()), where
pm_runtime_suspended() is used to decide whether or not to execute
a device's (system) ->suspend() callback.  The callback is not
executed if pm_runtime_suspended() returns true, but it does so
for devices that don't even support runtime PM, because the
power.disable_depth device field is ignored by it.  This leads to
problems (i.e. devices are not suspened when they should), so rework
pm_runtime_suspended() so that it returns false if the device's
power.disable_depth field is different from zero.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: stable@kernel.org
---
 Documentation/power/runtime_pm.txt | 4 ++--
 include/linux/pm_runtime.h         | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 489e9bacd165..41cc7b30d7dd 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -379,8 +379,8 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
       zero)
 
   bool pm_runtime_suspended(struct device *dev);
-    - return true if the device's runtime PM status is 'suspended', or false
-      otherwise
+    - return true if the device's runtime PM status is 'suspended' and its
+      'power.disable_depth' field is equal to zero, or false otherwise
 
   void pm_runtime_allow(struct device *dev);
     - set the power.runtime_auto flag for the device and decrease its usage
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 3ec2358f8692..d19f1cca7f74 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -77,7 +77,8 @@ static inline void device_set_run_wake(struct device *dev, bool enable)
 
 static inline bool pm_runtime_suspended(struct device *dev)
 {
-	return dev->power.runtime_status == RPM_SUSPENDED;
+	return dev->power.runtime_status == RPM_SUSPENDED
+		&& !dev->power.disable_depth;
 }
 
 static inline void pm_runtime_mark_last_busy(struct device *dev)
-- 
cgit v1.2.3


From 3f84622d7c7818077f5e6cf4b8a0d1b10dc65147 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Sat, 27 Nov 2010 19:26:32 +0100
Subject: SSB: Fix nvram_get on BCM47xx platform

The nvram_get function was never in the mainline kernel, it only existed in
an external OpenWrt patch. Use nvram_getenv function, which is in mainline
and use an include instead of an extra function declaration.  et0macaddr
contains the mac address in text from like 00:11:22:33:44:55. We have to
parse it before adding it into macaddr.

nvram_parse_macaddr will be merged into asm/mach-bcm47xx/nvram.h through
the MIPS git tree and will be available soon. It will not build now without
nvram_parse_macaddr, but it hasn't before either.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
To: linux-mips@linux-mips.org
Cc: mb@bu3sch.de
Cc: netdev@vger.kernel.org
Cc: Hauke Mehrtens <hauke@hauke-m.de>
Acked-by: Michael Buesch <mb@bu3sch.de>
Patchwork: https://patchwork.linux-mips.org/patch/1849/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/linux/ssb/ssb_driver_gige.h | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/ssb/ssb_driver_gige.h b/include/linux/ssb/ssb_driver_gige.h
index 942e38736901..eba52a100533 100644
--- a/include/linux/ssb/ssb_driver_gige.h
+++ b/include/linux/ssb/ssb_driver_gige.h
@@ -96,16 +96,21 @@ static inline bool ssb_gige_must_flush_posted_writes(struct pci_dev *pdev)
 	return 0;
 }
 
-extern char * nvram_get(const char *name);
+#ifdef CONFIG_BCM47XX
+#include <asm/mach-bcm47xx/nvram.h>
 /* Get the device MAC address */
 static inline void ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr)
 {
-#ifdef CONFIG_BCM47XX
-	char *res = nvram_get("et0macaddr");
-	if (res)
-		memcpy(macaddr, res, 6);
-#endif
+	char buf[20];
+	if (nvram_getenv("et0macaddr", buf, sizeof(buf)) < 0)
+		return;
+	nvram_parse_macaddr(buf, macaddr);
 }
+#else
+static inline void ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr)
+{
+}
+#endif
 
 extern int ssb_gige_pcibios_plat_dev_init(struct ssb_device *sdev,
 					  struct pci_dev *pdev);
-- 
cgit v1.2.3


From fcbdf09d9652c8919dcf47072e3ae7dcb4eb98ac Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Thu, 16 Dec 2010 14:26:56 -0800
Subject: net: fix nulls list corruptions in sk_prot_alloc

Special care is taken inside sk_port_alloc to avoid overwriting
skc_node/skc_nulls_node. We should also avoid overwriting
skc_bind_node/skc_portaddr_node.

The patch fixes the following crash:

 BUG: unable to handle kernel paging request at fffffffffffffff0
 IP: [<ffffffff812ec6dd>] udp4_lib_lookup2+0xad/0x370
 [<ffffffff812ecc22>] __udp4_lib_lookup+0x282/0x360
 [<ffffffff812ed63e>] __udp4_lib_rcv+0x31e/0x700
 [<ffffffff812bba45>] ? ip_local_deliver_finish+0x65/0x190
 [<ffffffff812bbbf8>] ? ip_local_deliver+0x88/0xa0
 [<ffffffff812eda35>] udp_rcv+0x15/0x20
 [<ffffffff812bba45>] ip_local_deliver_finish+0x65/0x190
 [<ffffffff812bbbf8>] ip_local_deliver+0x88/0xa0
 [<ffffffff812bb2cd>] ip_rcv_finish+0x32d/0x6f0
 [<ffffffff8128c14c>] ? netif_receive_skb+0x99c/0x11c0
 [<ffffffff812bb94b>] ip_rcv+0x2bb/0x350
 [<ffffffff8128c14c>] netif_receive_skb+0x99c/0x11c0

Signed-off-by: Leonard Crestez <lcrestez@ixiacom.com>
Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h |  3 +++
 net/core/sock.c    | 47 +++++++++++++++++++++++++++++++++++------------
 net/ipv4/udp.c     |  1 +
 net/ipv4/udplite.c |  1 +
 net/ipv6/udp.c     |  1 +
 net/ipv6/udplite.c |  1 +
 6 files changed, 42 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 659d968d95c5..7d3f7ce239b5 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -754,6 +754,7 @@ struct proto {
 	void			(*unhash)(struct sock *sk);
 	void			(*rehash)(struct sock *sk);
 	int			(*get_port)(struct sock *sk, unsigned short snum);
+	void			(*clear_sk)(struct sock *sk, int size);
 
 	/* Keeping track of sockets in use */
 #ifdef CONFIG_PROC_FS
@@ -852,6 +853,8 @@ static inline void __sk_prot_rehash(struct sock *sk)
 	sk->sk_prot->hash(sk);
 }
 
+void sk_prot_clear_portaddr_nulls(struct sock *sk, int size);
+
 /* About 10 seconds */
 #define SOCK_DESTROY_TIME (10*HZ)
 
diff --git a/net/core/sock.c b/net/core/sock.c
index fb6080111461..e5af8d5d5b50 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1009,6 +1009,36 @@ static void sock_copy(struct sock *nsk, const struct sock *osk)
 #endif
 }
 
+/*
+ * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
+ * un-modified. Special care is taken when initializing object to zero.
+ */
+static inline void sk_prot_clear_nulls(struct sock *sk, int size)
+{
+	if (offsetof(struct sock, sk_node.next) != 0)
+		memset(sk, 0, offsetof(struct sock, sk_node.next));
+	memset(&sk->sk_node.pprev, 0,
+	       size - offsetof(struct sock, sk_node.pprev));
+}
+
+void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
+{
+	unsigned long nulls1, nulls2;
+
+	nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
+	nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
+	if (nulls1 > nulls2)
+		swap(nulls1, nulls2);
+
+	if (nulls1 != 0)
+		memset((char *)sk, 0, nulls1);
+	memset((char *)sk + nulls1 + sizeof(void *), 0,
+	       nulls2 - nulls1 - sizeof(void *));
+	memset((char *)sk + nulls2 + sizeof(void *), 0,
+	       size - nulls2 - sizeof(void *));
+}
+EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
+
 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
 		int family)
 {
@@ -1021,19 +1051,12 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
 		if (!sk)
 			return sk;
 		if (priority & __GFP_ZERO) {
-			/*
-			 * caches using SLAB_DESTROY_BY_RCU should let
-			 * sk_node.next un-modified. Special care is taken
-			 * when initializing object to zero.
-			 */
-			if (offsetof(struct sock, sk_node.next) != 0)
-				memset(sk, 0, offsetof(struct sock, sk_node.next));
-			memset(&sk->sk_node.pprev, 0,
-			       prot->obj_size - offsetof(struct sock,
-							 sk_node.pprev));
+			if (prot->clear_sk)
+				prot->clear_sk(sk, prot->obj_size);
+			else
+				sk_prot_clear_nulls(sk, prot->obj_size);
 		}
-	}
-	else
+	} else
 		sk = kmalloc(prot->obj_size, priority);
 
 	if (sk != NULL) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5e0a3a582a59..2d3ded4d0786 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1899,6 +1899,7 @@ struct proto udp_prot = {
 	.compat_setsockopt = compat_udp_setsockopt,
 	.compat_getsockopt = compat_udp_getsockopt,
 #endif
+	.clear_sk	   = sk_prot_clear_portaddr_nulls,
 };
 EXPORT_SYMBOL(udp_prot);
 
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index ab76aa928fa9..aee9963f7f5a 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -57,6 +57,7 @@ struct proto 	udplite_prot = {
 	.compat_setsockopt = compat_udp_setsockopt,
 	.compat_getsockopt = compat_udp_getsockopt,
 #endif
+	.clear_sk	   = sk_prot_clear_portaddr_nulls,
 };
 EXPORT_SYMBOL(udplite_prot);
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 91def93bec85..cd6cb7c3e563 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1477,6 +1477,7 @@ struct proto udpv6_prot = {
 	.compat_setsockopt = compat_udpv6_setsockopt,
 	.compat_getsockopt = compat_udpv6_getsockopt,
 #endif
+	.clear_sk	   = sk_prot_clear_portaddr_nulls,
 };
 
 static struct inet_protosw udpv6_protosw = {
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 5f48fadc27f7..986c4de5292e 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -55,6 +55,7 @@ struct proto udplitev6_prot = {
 	.compat_setsockopt = compat_udpv6_setsockopt,
 	.compat_getsockopt = compat_udpv6_getsockopt,
 #endif
+	.clear_sk	   = sk_prot_clear_portaddr_nulls,
 };
 
 static struct inet_protosw udplite6_protosw = {
-- 
cgit v1.2.3


From e692cb668fdd5a712c6ed2a2d6f2a36ee83997b4 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Wed, 1 Dec 2010 19:41:49 +0100
Subject: block: Deprecate QUEUE_FLAG_CLUSTER and use queue_limits instead

When stacking devices, a request_queue is not always available. This
forced us to have a no_cluster flag in the queue_limits that could be
used as a carrier until the request_queue had been set up for a
metadevice.

There were several problems with that approach. First of all it was up
to the stacking device to remember to set queue flag after stacking had
completed. Also, the queue flag and the queue limits had to be kept in
sync at all times. We got that wrong, which could lead to us issuing
commands that went beyond the max scatterlist limit set by the driver.

The proper fix is to avoid having two flags for tracking the same thing.
We deprecate QUEUE_FLAG_CLUSTER and use the queue limit directly in the
block layer merging functions. The queue_limit 'no_cluster' is turned
into 'cluster' to avoid double negatives and to ease stacking.
Clustering defaults to being enabled as before. The queue flag logic is
removed from the stacking function, and explicitly setting the cluster
flag is no longer necessary in DM and MD.

Reported-by: Ed Lin <ed.lin@promise.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-merge.c       |  6 +++---
 block/blk-settings.c    | 25 ++-----------------------
 block/blk-sysfs.c       |  2 +-
 drivers/md/dm-table.c   |  5 -----
 drivers/md/md.c         |  3 ---
 drivers/scsi/scsi_lib.c |  3 +--
 include/linux/blkdev.h  |  9 ++++++---
 7 files changed, 13 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 77b7c26df6b5..74bc4a768f32 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -21,7 +21,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
 		return 0;
 
 	fbio = bio;
-	cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
+	cluster = blk_queue_cluster(q);
 	seg_size = 0;
 	nr_phys_segs = 0;
 	for_each_bio(bio) {
@@ -87,7 +87,7 @@ EXPORT_SYMBOL(blk_recount_segments);
 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
 				   struct bio *nxt)
 {
-	if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
+	if (!blk_queue_cluster(q))
 		return 0;
 
 	if (bio->bi_seg_back_size + nxt->bi_seg_front_size >
@@ -123,7 +123,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
 	int nsegs, cluster;
 
 	nsegs = 0;
-	cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
+	cluster = blk_queue_cluster(q);
 
 	/*
 	 * for each bio in rq
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 701859fb9647..e55f5fc4ca22 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -126,7 +126,7 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->alignment_offset = 0;
 	lim->io_opt = 0;
 	lim->misaligned = 0;
-	lim->no_cluster = 0;
+	lim->cluster = 1;
 }
 EXPORT_SYMBOL(blk_set_default_limits);
 
@@ -464,15 +464,6 @@ EXPORT_SYMBOL(blk_queue_io_opt);
 void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
 {
 	blk_stack_limits(&t->limits, &b->limits, 0);
-
-	if (!t->queue_lock)
-		WARN_ON_ONCE(1);
-	else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
-		unsigned long flags;
-		spin_lock_irqsave(t->queue_lock, flags);
-		queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
-		spin_unlock_irqrestore(t->queue_lock, flags);
-	}
 }
 EXPORT_SYMBOL(blk_queue_stack_limits);
 
@@ -545,7 +536,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 	t->io_min = max(t->io_min, b->io_min);
 	t->io_opt = lcm(t->io_opt, b->io_opt);
 
-	t->no_cluster |= b->no_cluster;
+	t->cluster &= b->cluster;
 	t->discard_zeroes_data &= b->discard_zeroes_data;
 
 	/* Physical block size a multiple of the logical block size? */
@@ -641,7 +632,6 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
 		       sector_t offset)
 {
 	struct request_queue *t = disk->queue;
-	struct request_queue *b = bdev_get_queue(bdev);
 
 	if (bdev_stack_limits(&t->limits, bdev, offset >> 9) < 0) {
 		char top[BDEVNAME_SIZE], bottom[BDEVNAME_SIZE];
@@ -652,17 +642,6 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
 		printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n",
 		       top, bottom);
 	}
-
-	if (!t->queue_lock)
-		WARN_ON_ONCE(1);
-	else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(t->queue_lock, flags);
-		if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
-			queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
-		spin_unlock_irqrestore(t->queue_lock, flags);
-	}
 }
 EXPORT_SYMBOL(disk_stack_limits);
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 013457f47fdc..41fb69150b4d 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -119,7 +119,7 @@ static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char *
 
 static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
 {
-	if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
+	if (blk_queue_cluster(q))
 		return queue_var_show(queue_max_segment_size(q), (page));
 
 	return queue_var_show(PAGE_CACHE_SIZE, (page));
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 90267f8d64ee..e2da1912a2cb 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1131,11 +1131,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	 */
 	q->limits = *limits;
 
-	if (limits->no_cluster)
-		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
-	else
-		queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
-
 	if (!dm_table_supports_discards(t))
 		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
 	else
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 84c46a161927..52694d29663d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4296,9 +4296,6 @@ static int md_alloc(dev_t dev, char *name)
 		goto abort;
 	mddev->queue->queuedata = mddev;
 
-	/* Can be unlocked because the queue is new: no concurrency */
-	queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue);
-
 	blk_queue_make_request(mddev->queue, md_make_request);
 
 	disk = alloc_disk(1 << shift);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index eafeeda6e194..9d7ba07dc5ef 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1642,9 +1642,8 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
 
 	blk_queue_max_segment_size(q, dma_get_max_seg_size(dev));
 
-	/* New queue, no concurrency on queue_flags */
 	if (!shost->use_clustering)
-		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
+		q->limits.cluster = 0;
 
 	/*
 	 * set a reasonable default alignment on word boundaries: the
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index aae86fd10c4f..95aeeeb49e8b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -250,7 +250,7 @@ struct queue_limits {
 
 	unsigned char		misaligned;
 	unsigned char		discard_misaligned;
-	unsigned char		no_cluster;
+	unsigned char		cluster;
 	signed char		discard_zeroes_data;
 };
 
@@ -380,7 +380,6 @@ struct request_queue
 #endif
 };
 
-#define QUEUE_FLAG_CLUSTER	0	/* cluster several segments into 1 */
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
 #define QUEUE_FLAG_STOPPED	2	/* queue is stopped */
 #define	QUEUE_FLAG_SYNCFULL	3	/* read queue has been filled */
@@ -403,7 +402,6 @@ struct request_queue
 #define QUEUE_FLAG_SECDISCARD  19	/* supports SECDISCARD */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
-				 (1 << QUEUE_FLAG_CLUSTER) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
 				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
 				 (1 << QUEUE_FLAG_ADD_RANDOM))
@@ -510,6 +508,11 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 
 #define rq_data_dir(rq)		((rq)->cmd_flags & 1)
 
+static inline unsigned int blk_queue_cluster(struct request_queue *q)
+{
+	return q->limits.cluster;
+}
+
 /*
  * We regard a request as sync, if either a read or a sync write
  */
-- 
cgit v1.2.3


From 72d4cd9f38b5ed96b75df4c622be25e1c2648dd3 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Fri, 17 Dec 2010 08:34:20 +0100
Subject: block: max hardware sectors limit wrapper

Implement blk_limits_max_hw_sectors() and make
blk_queue_max_hw_sectors() a wrapper around it.

DM needs this to avoid setting queue_limits' max_hw_sectors and
max_sectors directly.  dm_set_device_limits() now leverages
blk_limits_max_hw_sectors() logic to establish the appropriate
max_hw_sectors minimum (PAGE_SIZE).  Fixes issue where DM was
incorrectly setting max_sectors rather than max_hw_sectors (which
caused dm_merge_bvec()'s max_hw_sectors check to be ineffective).

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@kernel.org
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-settings.c   | 26 ++++++++++++++++++++------
 drivers/md/dm-table.c  |  5 ++---
 include/linux/blkdev.h |  1 +
 3 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index e55f5fc4ca22..36c8c1f2af18 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -229,8 +229,8 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask)
 EXPORT_SYMBOL(blk_queue_bounce_limit);
 
 /**
- * blk_queue_max_hw_sectors - set max sectors for a request for this queue
- * @q:  the request queue for the device
+ * blk_limits_max_hw_sectors - set hard and soft limit of max sectors for request
+ * @limits: the queue limits
  * @max_hw_sectors:  max hardware sectors in the usual 512b unit
  *
  * Description:
@@ -244,7 +244,7 @@ EXPORT_SYMBOL(blk_queue_bounce_limit);
  *    per-device basis in /sys/block/<device>/queue/max_sectors_kb.
  *    The soft limit can not exceed max_hw_sectors.
  **/
-void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
+void blk_limits_max_hw_sectors(struct queue_limits *limits, unsigned int max_hw_sectors)
 {
 	if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) {
 		max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
@@ -252,9 +252,23 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
 		       __func__, max_hw_sectors);
 	}
 
-	q->limits.max_hw_sectors = max_hw_sectors;
-	q->limits.max_sectors = min_t(unsigned int, max_hw_sectors,
-				      BLK_DEF_MAX_SECTORS);
+	limits->max_hw_sectors = max_hw_sectors;
+	limits->max_sectors = min_t(unsigned int, max_hw_sectors,
+				    BLK_DEF_MAX_SECTORS);
+}
+EXPORT_SYMBOL(blk_limits_max_hw_sectors);
+
+/**
+ * blk_queue_max_hw_sectors - set max sectors for a request for this queue
+ * @q:  the request queue for the device
+ * @max_hw_sectors:  max hardware sectors in the usual 512b unit
+ *
+ * Description:
+ *    See description for blk_limits_max_hw_sectors().
+ **/
+void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
+{
+	blk_limits_max_hw_sectors(&q->limits, max_hw_sectors);
 }
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index e2da1912a2cb..4d705cea0f8c 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -517,9 +517,8 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
 	 */
 
 	if (q->merge_bvec_fn && !ti->type->merge)
-		limits->max_sectors =
-			min_not_zero(limits->max_sectors,
-				     (unsigned int) (PAGE_SIZE >> 9));
+		blk_limits_max_hw_sectors(limits,
+					  (unsigned int) (PAGE_SIZE >> 9));
 	return 0;
 }
 EXPORT_SYMBOL_GPL(dm_set_device_limits);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 95aeeeb49e8b..36ab42c9bb99 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -808,6 +808,7 @@ extern struct request_queue *blk_init_allocated_queue(struct request_queue *,
 extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
+extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
-- 
cgit v1.2.3


From b6aa5901c7a2bd90d0b6b9866300d2648b2568f3 Mon Sep 17 00:00:00 2001
From: Henry C Chang <henry_c_chang@tcloudcomputing.com>
Date: Wed, 15 Dec 2010 20:45:41 -0800
Subject: ceph: mark user pages dirty on direct-io reads

For read operation, we have to set the argument _write_ of get_user_pages
to 1 since we will write data to pages. Also, we need to SetPageDirty before
releasing these pages.

Signed-off-by: Henry C Chang <henry_c_chang@tcloudcomputing.com>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/file.c               |  8 ++++----
 include/linux/ceph/libceph.h |  6 ++++--
 net/ceph/pagevec.c           | 11 +++++++----
 3 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index e860d8f1bb45..7d0e4a82d898 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -384,7 +384,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
 
 	if (file->f_flags & O_DIRECT) {
 		num_pages = calc_pages_for((unsigned long)data, len);
-		pages = ceph_get_direct_page_vector(data, num_pages);
+		pages = ceph_get_direct_page_vector(data, num_pages, true);
 	} else {
 		num_pages = calc_pages_for(off, len);
 		pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
@@ -413,7 +413,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
 
 done:
 	if (file->f_flags & O_DIRECT)
-		ceph_put_page_vector(pages, num_pages);
+		ceph_put_page_vector(pages, num_pages, true);
 	else
 		ceph_release_page_vector(pages, num_pages);
 	dout("sync_read result %d\n", ret);
@@ -522,7 +522,7 @@ more:
 		return -ENOMEM;
 
 	if (file->f_flags & O_DIRECT) {
-		pages = ceph_get_direct_page_vector(data, num_pages);
+		pages = ceph_get_direct_page_vector(data, num_pages, false);
 		if (IS_ERR(pages)) {
 			ret = PTR_ERR(pages);
 			goto out;
@@ -572,7 +572,7 @@ more:
 	}
 
 	if (file->f_flags & O_DIRECT)
-		ceph_put_page_vector(pages, num_pages);
+		ceph_put_page_vector(pages, num_pages, false);
 	else if (file->f_flags & O_SYNC)
 		ceph_release_page_vector(pages, num_pages);
 
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 9e76d35670d2..72c72bfccb88 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -227,8 +227,10 @@ extern int ceph_open_session(struct ceph_client *client);
 extern void ceph_release_page_vector(struct page **pages, int num_pages);
 
 extern struct page **ceph_get_direct_page_vector(const char __user *data,
-						 int num_pages);
-extern void ceph_put_page_vector(struct page **pages, int num_pages);
+						 int num_pages,
+						 bool write_page);
+extern void ceph_put_page_vector(struct page **pages, int num_pages,
+				 bool dirty);
 extern void ceph_release_page_vector(struct page **pages, int num_pages);
 extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
 extern int ceph_copy_user_to_page_vector(struct page **pages,
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index ac34feeb2b3a..01947a5d03b7 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -13,7 +13,7 @@
  * build a vector of user pages
  */
 struct page **ceph_get_direct_page_vector(const char __user *data,
-					  int num_pages)
+					  int num_pages, bool write_page)
 {
 	struct page **pages;
 	int rc;
@@ -24,7 +24,7 @@ struct page **ceph_get_direct_page_vector(const char __user *data,
 
 	down_read(&current->mm->mmap_sem);
 	rc = get_user_pages(current, current->mm, (unsigned long)data,
-			    num_pages, 0, 0, pages, NULL);
+			    num_pages, write_page, 0, pages, NULL);
 	up_read(&current->mm->mmap_sem);
 	if (rc < 0)
 		goto fail;
@@ -36,12 +36,15 @@ fail:
 }
 EXPORT_SYMBOL(ceph_get_direct_page_vector);
 
-void ceph_put_page_vector(struct page **pages, int num_pages)
+void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
 {
 	int i;
 
-	for (i = 0; i < num_pages; i++)
+	for (i = 0; i < num_pages; i++) {
+		if (dirty)
+			set_page_dirty_lock(pages[i]);
 		put_page(pages[i]);
+	}
 	kfree(pages);
 }
 EXPORT_SYMBOL(ceph_put_page_vector);
-- 
cgit v1.2.3


From c0f5ac5426f7fd82b23dd5c6a1e633b290294a08 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Thu, 16 Dec 2010 10:38:41 -0700
Subject: Revert "resources: support allocating space within a region from the
 top down"

This reverts commit e7f8567db9a7f6b3151b0b275e245c1cef0d9c70.

Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/kernel-parameters.txt |  5 --
 include/linux/ioport.h              |  1 -
 kernel/resource.c                   | 98 ++-----------------------------------
 3 files changed, 4 insertions(+), 100 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index cdd2a6e8a3b7..8b61c9360999 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2175,11 +2175,6 @@ and is between 256 and 4096 characters. It is defined in the file
 	reset_devices	[KNL] Force drivers to reset the underlying device
 			during initialization.
 
-	resource_alloc_from_bottom
-			Allocate new resources from the beginning of available
-			space, not the end.  If you need to use this, please
-			report a bug.
-
 	resume=		[SWSUSP]
 			Specify the partition device for software suspend
 
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index d377ea815d45..b22790268b64 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -112,7 +112,6 @@ struct resource_list {
 /* PC/ISA/whatever - the normal PC address spaces: IO and memory */
 extern struct resource ioport_resource;
 extern struct resource iomem_resource;
-extern int resource_alloc_from_bottom;
 
 extern struct resource *request_resource_conflict(struct resource *root, struct resource *new);
 extern int request_resource(struct resource *root, struct resource *new);
diff --git a/kernel/resource.c b/kernel/resource.c
index 9fad33efd0db..560659f7baef 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -40,23 +40,6 @@ EXPORT_SYMBOL(iomem_resource);
 
 static DEFINE_RWLOCK(resource_lock);
 
-/*
- * By default, we allocate free space bottom-up.  The architecture can request
- * top-down by clearing this flag.  The user can override the architecture's
- * choice with the "resource_alloc_from_bottom" kernel boot option, but that
- * should only be a debugging tool.
- */
-int resource_alloc_from_bottom = 1;
-
-static __init int setup_alloc_from_bottom(char *s)
-{
-	printk(KERN_INFO
-	       "resource: allocating from bottom-up; please report a bug\n");
-	resource_alloc_from_bottom = 1;
-	return 0;
-}
-early_param("resource_alloc_from_bottom", setup_alloc_from_bottom);
-
 static void *r_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct resource *p = v;
@@ -396,75 +379,8 @@ static bool resource_contains(struct resource *res1, struct resource *res2)
 	return res1->start <= res2->start && res1->end >= res2->end;
 }
 
-/*
- * Find the resource before "child" in the sibling list of "root" children.
- */
-static struct resource *find_sibling_prev(struct resource *root, struct resource *child)
-{
-	struct resource *this;
-
-	for (this = root->child; this; this = this->sibling)
-		if (this->sibling == child)
-			return this;
-
-	return NULL;
-}
-
-/*
- * Find empty slot in the resource tree given range and alignment.
- * This version allocates from the end of the root resource first.
- */
-static int find_resource_from_top(struct resource *root, struct resource *new,
-				  resource_size_t size, resource_size_t min,
-				  resource_size_t max, resource_size_t align,
-				  resource_size_t (*alignf)(void *,
-						   const struct resource *,
-						   resource_size_t,
-						   resource_size_t),
-				  void *alignf_data)
-{
-	struct resource *this;
-	struct resource tmp, avail, alloc;
-
-	tmp.start = root->end;
-	tmp.end = root->end;
-
-	this = find_sibling_prev(root, NULL);
-	for (;;) {
-		if (this) {
-			if (this->end < root->end)
-				tmp.start = this->end + 1;
-		} else
-			tmp.start = root->start;
-
-		resource_clip(&tmp, min, max);
-
-		/* Check for overflow after ALIGN() */
-		avail = *new;
-		avail.start = ALIGN(tmp.start, align);
-		avail.end = tmp.end;
-		if (avail.start >= tmp.start) {
-			alloc.start = alignf(alignf_data, &avail, size, align);
-			alloc.end = alloc.start + size - 1;
-			if (resource_contains(&avail, &alloc)) {
-				new->start = alloc.start;
-				new->end = alloc.end;
-				return 0;
-			}
-		}
-
-		if (!this || this->start == root->start)
-			break;
-
-		tmp.end = this->start - 1;
-		this = find_sibling_prev(root, this);
-	}
-	return -EBUSY;
-}
-
 /*
  * Find empty slot in the resource tree given range and alignment.
- * This version allocates from the beginning of the root resource first.
  */
 static int find_resource(struct resource *root, struct resource *new,
 			 resource_size_t size, resource_size_t min,
@@ -480,15 +396,14 @@ static int find_resource(struct resource *root, struct resource *new,
 
 	tmp.start = root->start;
 	/*
-	 * Skip past an allocated resource that starts at 0, since the
-	 * assignment of this->start - 1 to tmp->end below would cause an
-	 * underflow.
+	 * Skip past an allocated resource that starts at 0, since the assignment
+	 * of this->start - 1 to tmp->end below would cause an underflow.
 	 */
 	if (this && this->start == 0) {
 		tmp.start = this->end + 1;
 		this = this->sibling;
 	}
-	for (;;) {
+	for(;;) {
 		if (this)
 			tmp.end = this->start - 1;
 		else
@@ -509,10 +424,8 @@ static int find_resource(struct resource *root, struct resource *new,
 				return 0;
 			}
 		}
-
 		if (!this)
 			break;
-
 		tmp.start = this->end + 1;
 		this = this->sibling;
 	}
@@ -545,10 +458,7 @@ int allocate_resource(struct resource *root, struct resource *new,
 		alignf = simple_align_resource;
 
 	write_lock(&resource_lock);
-	if (resource_alloc_from_bottom)
-		err = find_resource(root, new, size, min, max, align, alignf, alignf_data);
-	else
-		err = find_resource_from_top(root, new, size, min, max, align, alignf, alignf_data);
+	err = find_resource(root, new, size, min, max, align, alignf, alignf_data);
 	if (err >= 0 && __request_resource(root, new))
 		err = -EBUSY;
 	write_unlock(&resource_lock);
-- 
cgit v1.2.3


From fcb119183c73bf0781009713f303e28b1fb13d3e Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Thu, 16 Dec 2010 10:38:46 -0700
Subject: resources: add arch hook for preventing allocation in reserved areas

This adds arch_remove_reservations(), which an arch can implement if it
needs to protect part of the address space from allocation.

Sometimes that can be done by just putting a region in the resource tree,
but there are cases where that doesn't work well.  For example, x86 BIOS
E820 reservations are not related to devices, so they may overlap part of,
all of, or more than a device resource, so they may not end up at the
correct spot in the resource tree.

Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/ioport.h | 1 +
 kernel/resource.c      | 6 ++++++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index b22790268b64..e9bb22cba764 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -123,6 +123,7 @@ extern void reserve_region_with_split(struct resource *root,
 extern struct resource *insert_resource_conflict(struct resource *parent, struct resource *new);
 extern int insert_resource(struct resource *parent, struct resource *new);
 extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new);
+extern void arch_remove_reservations(struct resource *avail);
 extern int allocate_resource(struct resource *root, struct resource *new,
 			     resource_size_t size, resource_size_t min,
 			     resource_size_t max, resource_size_t align,
diff --git a/kernel/resource.c b/kernel/resource.c
index 560659f7baef..798e2fae2a06 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -357,6 +357,10 @@ int __weak page_is_ram(unsigned long pfn)
 	return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1;
 }
 
+void __weak arch_remove_reservations(struct resource *avail)
+{
+}
+
 static resource_size_t simple_align_resource(void *data,
 					     const struct resource *avail,
 					     resource_size_t size,
@@ -394,6 +398,7 @@ static int find_resource(struct resource *root, struct resource *new,
 	struct resource *this = root->child;
 	struct resource tmp = *new, avail, alloc;
 
+	tmp.flags = new->flags;
 	tmp.start = root->start;
 	/*
 	 * Skip past an allocated resource that starts at 0, since the assignment
@@ -410,6 +415,7 @@ static int find_resource(struct resource *root, struct resource *new,
 			tmp.end = root->end;
 
 		resource_clip(&tmp, min, max);
+		arch_remove_reservations(&tmp);
 
 		/* Check for overflow after ALIGN() */
 		avail = *new;
-- 
cgit v1.2.3


From 4b8fe66300acb2fba8b16d62606e0d30204022fc Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Fri, 17 Dec 2010 12:03:14 -0800
Subject: netlink: fix gcc -Wconversion compilation warning

$ cat << EOF | gcc -Wconversion -xc -S -o/dev/null -
unsigned f(void) {return NLMSG_HDRLEN;}
EOF
<stdin>: In function 'f':
<stdin>:3:26: warning: negative integer implicitly converted to unsigned type

Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 123566912d73..e2b9e63afa68 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -70,7 +70,7 @@ struct nlmsghdr {
    Check		NLM_F_EXCL
  */
 
-#define NLMSG_ALIGNTO	4
+#define NLMSG_ALIGNTO	4U
 #define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) )
 #define NLMSG_HDRLEN	 ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
 #define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(NLMSG_HDRLEN))
-- 
cgit v1.2.3


From ad0081e43af6de3fecf308b0d098f9611835766b Mon Sep 17 00:00:00 2001
From: David Stevens <dlstevens@us.ibm.com>
Date: Fri, 17 Dec 2010 11:42:42 +0000
Subject: ipv6: Fragment locally generated tunnel-mode IPSec6 packets as
 needed.

This patch modifies IPsec6 to fragment IPv6 packets that are
locally generated as needed.

This version of the patch only fragments in tunnel mode, so that fragment
headers will not be obscured by ESP in transport mode.

Signed-off-by: David L Stevens <dlstevens@us.ibm.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h | 10 ++++++++++
 net/ipv6/ip6_output.c   | 12 ++----------
 net/ipv6/xfrm6_output.c | 16 +++++++++++++++-
 3 files changed, 27 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 278312c95f96..2ab926860cd8 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -164,5 +164,15 @@ static inline int ipv6_unicast_destination(struct sk_buff *skb)
 	return rt->rt6i_flags & RTF_LOCAL;
 }
 
+int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
+
+static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
+{
+	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
+
+	return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
+	       skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
+}
+
 #endif
 #endif
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 99157b4cd56e..94b5bf132b2e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -56,7 +56,7 @@
 #include <net/checksum.h>
 #include <linux/mroute6.h>
 
-static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
+int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
 
 int __ip6_local_out(struct sk_buff *skb)
 {
@@ -145,14 +145,6 @@ static int ip6_finish_output2(struct sk_buff *skb)
 	return -EINVAL;
 }
 
-static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
-{
-	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
-
-	return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
-	       skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
-}
-
 static int ip6_finish_output(struct sk_buff *skb)
 {
 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
@@ -601,7 +593,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 	return offset;
 }
 
-static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
+int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 {
 	struct sk_buff *frag;
 	struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 6434bd5ce088..8e688b3de9ab 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -17,6 +17,7 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/dst.h>
 #include <net/ipv6.h>
+#include <net/ip6_route.h>
 #include <net/xfrm.h>
 
 int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
@@ -88,8 +89,21 @@ static int xfrm6_output_finish(struct sk_buff *skb)
 	return xfrm_output(skb);
 }
 
+static int __xfrm6_output(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	struct xfrm_state *x = dst->xfrm;
+
+	if ((x && x->props.mode == XFRM_MODE_TUNNEL) &&
+	    ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
+		dst_allfrag(skb_dst(skb)))) {
+			return ip6_fragment(skb, xfrm6_output_finish);
+	}
+	return xfrm6_output_finish(skb);
+}
+
 int xfrm6_output(struct sk_buff *skb)
 {
 	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL,
-		       skb_dst(skb)->dev, xfrm6_output_finish);
+		       skb_dst(skb)->dev, __xfrm6_output);
 }
-- 
cgit v1.2.3


From b8da46d3d55807037b58f14621a0949f18053bde Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@fluxnic.net>
Date: Mon, 20 Dec 2010 00:29:32 -0500
Subject: clarify a usage constraint for cnt32_to_63()

The cnt32_to_63 algorithm relies on proper counter data evaluation
ordering to work properly. This was missing from the provided
documentation.

Let's augment the documentation with the missing usage constraint and
fix the only instance that got it wrong.

Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mn10300/kernel/time.c  | 10 +++-------
 include/linux/cnt32_to_63.h | 20 +++++++++++++++++++-
 2 files changed, 22 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c
index f860a340acc9..75da468090b9 100644
--- a/arch/mn10300/kernel/time.c
+++ b/arch/mn10300/kernel/time.c
@@ -40,21 +40,17 @@ unsigned long long sched_clock(void)
 		unsigned long long ll;
 		unsigned l[2];
 	} tsc64, result;
-	unsigned long tsc, tmp;
+	unsigned long tmp;
 	unsigned product[3]; /* 96-bit intermediate value */
 
 	/* cnt32_to_63() is not safe with preemption */
 	preempt_disable();
 
-	/* read the TSC value
-	 */
-	tsc = get_cycles();
-
-	/* expand to 64-bits.
+	/* expand the tsc to 64-bits.
 	 * - sched_clock() must be called once a minute or better or the
 	 *   following will go horribly wrong - see cnt32_to_63()
 	 */
-	tsc64.ll = cnt32_to_63(tsc) & 0x7fffffffffffffffULL;
+	tsc64.ll = cnt32_to_63(get_cycles()) & 0x7fffffffffffffffULL;
 
 	preempt_enable();
 
diff --git a/include/linux/cnt32_to_63.h b/include/linux/cnt32_to_63.h
index 7605fdd1eb65..e3d8bf26e5eb 100644
--- a/include/linux/cnt32_to_63.h
+++ b/include/linux/cnt32_to_63.h
@@ -61,13 +61,31 @@ union cnt32_to_63 {
  *
  * 2) this code must not be preempted for a duration longer than the
  *    32-bit counter half period minus the longest period between two
- *    calls to this code.
+ *    calls to this code;
  *
  * Those requirements ensure proper update to the state bit in memory.
  * This is usually not a problem in practice, but if it is then a kernel
  * timer should be scheduled to manage for this code to be executed often
  * enough.
  *
+ * And finally:
+ *
+ * 3) the cnt_lo argument must be seen as a globally incrementing value,
+ *    meaning that it should be a direct reference to the counter data which
+ *    can be evaluated according to a specific ordering within the macro,
+ *    and not the result of a previous evaluation stored in a variable.
+ *
+ * For example, this is wrong:
+ *
+ *	u32 partial = get_hw_count();
+ *	u64 full = cnt32_to_63(partial);
+ *	return full;
+ *
+ * This is fine:
+ *
+ *	u64 full = cnt32_to_63(get_hw_count());
+ *	return full;
+ *
  * Note that the top bit (bit 63) in the returned value should be considered
  * as garbage.  It is not cleared here because callers are likely to use a
  * multiplier on the returned value which can get rid of the top bit
-- 
cgit v1.2.3


From 173021072e86a0a5b3d2271347493a3e0d5f68e8 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 20 Dec 2010 04:35:30 +0000
Subject: net_sched: always clone skbs

Pawel reported a panic related to handling shared skbs in ixgbe
incorrectly. So we need to revert my previous patch to work around
this bug. Instead of reverting the patch completely, I just revert
the essential lines, so we can add the previous optimization
back more easily in future.

    commit 3511c9132f8b1e1b5634e41a3331c44b0c13be70
    Author: Changli Gao <xiaosuo@gmail.com>
    Date:   Sat Oct 16 13:04:08 2010 +0000

        net_sched: remove the unused parameter of qdisc_create_dflt()

Reported-by: Pawel Staszewski <pstaszewski@itcare.pl>
Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index ea1f8a83160d..79f34e2b752f 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -610,11 +610,7 @@ static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask,
 {
 	struct sk_buff *n;
 
-	if ((action == TC_ACT_STOLEN || action == TC_ACT_QUEUED) &&
-	    !skb_shared(skb))
-		n = skb_get(skb);
-	else
-		n = skb_clone(skb, gfp_mask);
+	n = skb_clone(skb, gfp_mask);
 
 	if (n) {
 		n->tc_verd = SET_TC_VERD(n->tc_verd, 0);
-- 
cgit v1.2.3


From 9f333281a7da4c3a59bccc0cb53f7590eb850d93 Mon Sep 17 00:00:00 2001
From: Johannes Stezenbach <js@sig21.net>
Date: Tue, 30 Nov 2010 16:49:23 +0100
Subject: mac80211/rt2x00: add ieee80211_tx_status_ni()

All rt2x00 drivers except rt2800pci call ieee80211_tx_status() from
a workqueue, which causes "NOHZ: local_softirq_pending 08" messages.

To fix it, add ieee80211_tx_status_ni() similar to ieee80211_rx_ni()
which can be called from process context, and call it from
rt2x00lib_txdone().  For the rt2800pci special case a driver
flag is introduced.

https://bugzilla.kernel.org/show_bug.cgi?id=24892

Signed-off-by: Johannes Stezenbach <js@sig21.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/rt2x00/rt2800pci.c |  1 +
 drivers/net/wireless/rt2x00/rt2x00.h    |  1 +
 drivers/net/wireless/rt2x00/rt2x00dev.c |  9 ++++++---
 include/net/mac80211.h                  | 28 ++++++++++++++++++++++++----
 4 files changed, 32 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/rt2x00/rt2800pci.c b/drivers/net/wireless/rt2x00/rt2800pci.c
index b26739535986..09a67905c230 100644
--- a/drivers/net/wireless/rt2x00/rt2800pci.c
+++ b/drivers/net/wireless/rt2x00/rt2800pci.c
@@ -912,6 +912,7 @@ static int rt2800pci_probe_hw(struct rt2x00_dev *rt2x00dev)
 	__set_bit(DRIVER_REQUIRE_DMA, &rt2x00dev->flags);
 	__set_bit(DRIVER_REQUIRE_L2PAD, &rt2x00dev->flags);
 	__set_bit(DRIVER_REQUIRE_TXSTATUS_FIFO, &rt2x00dev->flags);
+	__set_bit(DRIVER_REQUIRE_TASKLET_CONTEXT, &rt2x00dev->flags);
 	if (!modparam_nohwcrypt)
 		__set_bit(CONFIG_SUPPORT_HW_CRYPTO, &rt2x00dev->flags);
 	__set_bit(DRIVER_SUPPORT_LINK_TUNING, &rt2x00dev->flags);
diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h
index 94fe589acfaa..ab43e7ca2a23 100644
--- a/drivers/net/wireless/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/rt2x00/rt2x00.h
@@ -664,6 +664,7 @@ enum rt2x00_flags {
 	DRIVER_REQUIRE_COPY_IV,
 	DRIVER_REQUIRE_L2PAD,
 	DRIVER_REQUIRE_TXSTATUS_FIFO,
+	DRIVER_REQUIRE_TASKLET_CONTEXT,
 
 	/*
 	 * Driver features
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 5ba79b935f09..d019830ca840 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -390,9 +390,12 @@ void rt2x00lib_txdone(struct queue_entry *entry,
 	 * through a mac80211 library call (RTS/CTS) then we should not
 	 * send the status report back.
 	 */
-	if (!(skbdesc_flags & SKBDESC_NOT_MAC80211))
-		ieee80211_tx_status(rt2x00dev->hw, entry->skb);
-	else
+	if (!(skbdesc_flags & SKBDESC_NOT_MAC80211)) {
+		if (test_bit(DRIVER_REQUIRE_TASKLET_CONTEXT, &rt2x00dev->flags))
+			ieee80211_tx_status(rt2x00dev->hw, entry->skb);
+		else
+			ieee80211_tx_status_ni(rt2x00dev->hw, entry->skb);
+	} else
 		dev_kfree_skb_any(entry->skb);
 
 	/*
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 9fdf982d1286..365359b24177 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2024,8 +2024,8 @@ static inline void ieee80211_rx_ni(struct ieee80211_hw *hw,
  *
  * This function may not be called in IRQ context. Calls to this function
  * for a single hardware must be synchronized against each other. Calls
- * to this function and ieee80211_tx_status_irqsafe() may not be mixed
- * for a single hardware.
+ * to this function, ieee80211_tx_status_ni() and ieee80211_tx_status_irqsafe()
+ * may not be mixed for a single hardware.
  *
  * @hw: the hardware the frame was transmitted by
  * @skb: the frame that was transmitted, owned by mac80211 after this call
@@ -2033,14 +2033,34 @@ static inline void ieee80211_rx_ni(struct ieee80211_hw *hw,
 void ieee80211_tx_status(struct ieee80211_hw *hw,
 			 struct sk_buff *skb);
 
+/**
+ * ieee80211_tx_status_ni - transmit status callback (in process context)
+ *
+ * Like ieee80211_tx_status() but can be called in process context.
+ *
+ * Calls to this function, ieee80211_tx_status() and
+ * ieee80211_tx_status_irqsafe() may not be mixed
+ * for a single hardware.
+ *
+ * @hw: the hardware the frame was transmitted by
+ * @skb: the frame that was transmitted, owned by mac80211 after this call
+ */
+static inline void ieee80211_tx_status_ni(struct ieee80211_hw *hw,
+					  struct sk_buff *skb)
+{
+	local_bh_disable();
+	ieee80211_tx_status(hw, skb);
+	local_bh_enable();
+}
+
 /**
  * ieee80211_tx_status_irqsafe - IRQ-safe transmit status callback
  *
  * Like ieee80211_tx_status() but can be called in IRQ context
  * (internally defers to a tasklet.)
  *
- * Calls to this function and ieee80211_tx_status() may not be mixed for a
- * single hardware.
+ * Calls to this function, ieee80211_tx_status() and
+ * ieee80211_tx_status_ni() may not be mixed for a single hardware.
  *
  * @hw: the hardware the frame was transmitted by
  * @skb: the frame that was transmitted, owned by mac80211 after this call
-- 
cgit v1.2.3


From da521b2c4f046383bc8941604174bc0e8bffb430 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 21 Dec 2010 12:43:16 -0800
Subject: net: Fix range checks in tcf_valid_offset().

This function has three bugs:

1) The offset should be valid most of the time, this is just
   a sanity check, therefore we should use "likely" not "unlikely"

2) This is the only place where we can check for arithmetic overflow
   of the pointer plus the length.

3) The existing range checks are off by one, the valid range is
   skb->head to skb_tail_pointer(), inclusive.

Based almost entirely upon a patch by Ralph Loader.

Reported-by: Ralph Loader <suckfish@ihug.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index dd3031aed9d5..9fcc680ab6b9 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -323,7 +323,9 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer)
 static inline int tcf_valid_offset(const struct sk_buff *skb,
 				   const unsigned char *ptr, const int len)
 {
-	return unlikely((ptr + len) < skb_tail_pointer(skb) && ptr > skb->head);
+	return likely((ptr + len) <= skb_tail_pointer(skb) &&
+		      ptr >= skb->head &&
+		      (ptr <= (ptr + len)));
 }
 
 #ifdef CONFIG_NET_CLS_IND
-- 
cgit v1.2.3


From 4f32e9b1f812fd6c00cc85a127583fefbdedaedc Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang0@gmail.com>
Date: Wed, 22 Dec 2010 10:27:53 +0100
Subject: kthread_work: make lockdep happy

spinlock in kthread_worker and wait_queue_head in kthread_work both
should be lockdep sensible, so change the interface to make it
suiltable for CONFIG_LOCKDEP.

tj: comment update

Reported-by: Nicolas <nicolas.mailhot@laposte.net>
Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Signed-off-by: Andy Walls <awalls@md.metrocast.net>
Tested-by: Andy Walls <awalls@md.metrocast.net>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/kthread.h | 45 +++++++++++++++++++++++++++++++++++----------
 kernel/kthread.c        | 11 +++++++++++
 2 files changed, 46 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 685ea65eb803..ce0775aa64c3 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -81,16 +81,41 @@ struct kthread_work {
 #define DEFINE_KTHREAD_WORK(work, fn)					\
 	struct kthread_work work = KTHREAD_WORK_INIT(work, fn)
 
-static inline void init_kthread_worker(struct kthread_worker *worker)
-{
-	*worker = (struct kthread_worker)KTHREAD_WORKER_INIT(*worker);
-}
-
-static inline void init_kthread_work(struct kthread_work *work,
-				     kthread_work_func_t fn)
-{
-	*work = (struct kthread_work)KTHREAD_WORK_INIT(*work, fn);
-}
+/*
+ * kthread_worker.lock and kthread_work.done need their own lockdep class
+ * keys if they are defined on stack with lockdep enabled.  Use the
+ * following macros when defining them on stack.
+ */
+#ifdef CONFIG_LOCKDEP
+# define KTHREAD_WORKER_INIT_ONSTACK(worker)				\
+	({ init_kthread_worker(&worker); worker; })
+# define DEFINE_KTHREAD_WORKER_ONSTACK(worker)				\
+	struct kthread_worker worker = KTHREAD_WORKER_INIT_ONSTACK(worker)
+# define KTHREAD_WORK_INIT_ONSTACK(work, fn)				\
+	({ init_kthread_work((&work), fn); work; })
+# define DEFINE_KTHREAD_WORK_ONSTACK(work, fn)				\
+	struct kthread_work work = KTHREAD_WORK_INIT_ONSTACK(work, fn)
+#else
+# define DEFINE_KTHREAD_WORKER_ONSTACK(worker) DEFINE_KTHREAD_WORKER(worker)
+# define DEFINE_KTHREAD_WORK_ONSTACK(work, fn) DEFINE_KTHREAD_WORK(work, fn)
+#endif
+
+extern void __init_kthread_worker(struct kthread_worker *worker,
+			const char *name, struct lock_class_key *key);
+
+#define init_kthread_worker(worker)					\
+	do {								\
+		static struct lock_class_key __key;			\
+		__init_kthread_worker((worker), "("#worker")->lock", &__key); \
+	} while (0)
+
+#define init_kthread_work(work, fn)					\
+	do {								\
+		memset((work), 0, sizeof(struct kthread_work));		\
+		INIT_LIST_HEAD(&(work)->node);				\
+		(work)->func = (fn);					\
+		init_waitqueue_head(&(work)->done);			\
+	} while (0)
 
 int kthread_worker_fn(void *worker_ptr);
 
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 2dc3786349d1..ca61bbdd44b2 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -265,6 +265,17 @@ int kthreadd(void *unused)
 	return 0;
 }
 
+void __init_kthread_worker(struct kthread_worker *worker,
+				const char *name,
+				struct lock_class_key *key)
+{
+	spin_lock_init(&worker->lock);
+	lockdep_set_class_and_name(&worker->lock, key, name);
+	INIT_LIST_HEAD(&worker->work_list);
+	worker->task = NULL;
+}
+EXPORT_SYMBOL_GPL(__init_kthread_worker);
+
 /**
  * kthread_worker_fn - kthread function to process kthread_worker
  * @worker_ptr: pointer to initialized kthread_worker
-- 
cgit v1.2.3


From 4e06fd14d5fa78826397c891654a37e5a36ee827 Mon Sep 17 00:00:00 2001
From: Will Newton <will.newton@gmail.com>
Date: Tue, 21 Dec 2010 17:24:29 -0800
Subject: include/linux/unaligned: pack the whole struct rather than just the
 field

The current packed struct implementation of unaligned access adds the
packed attribute only to the field within the unaligned struct rather than
to the struct as a whole.  This is not sufficient to enforce proper
behaviour on architectures with a default struct alignment of more than
one byte.

For example, the current implementation of __get_unaligned_cpu16 when
compiled for arm with gcc -O1 -mstructure-size-boundary=32 assumes the
struct is on a 4 byte boundary so performs the load of the 16bit packed
field as if it were on a 4 byte boundary:

__get_unaligned_cpu16:
        ldrh    r0, [r0, #0]
        bx      lr

Moving the packed attribute to the struct rather than the field causes the
proper unaligned access code to be generated:

__get_unaligned_cpu16:
	ldrb	r3, [r0, #0]	@ zero_extendqisi2
	ldrb	r0, [r0, #1]	@ zero_extendqisi2
	orr	r0, r3, r0, asl #8
	bx	lr

Signed-off-by: Will Newton <will.newton@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/unaligned/packed_struct.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/unaligned/packed_struct.h b/include/linux/unaligned/packed_struct.h
index 2498bb9fe002..c9a6abd972a1 100644
--- a/include/linux/unaligned/packed_struct.h
+++ b/include/linux/unaligned/packed_struct.h
@@ -3,9 +3,9 @@
 
 #include <linux/kernel.h>
 
-struct __una_u16 { u16 x __attribute__((packed)); };
-struct __una_u32 { u32 x __attribute__((packed)); };
-struct __una_u64 { u64 x __attribute__((packed)); };
+struct __una_u16 { u16 x; } __attribute__((packed));
+struct __una_u32 { u32 x; } __attribute__((packed));
+struct __una_u64 { u64 x; } __attribute__((packed));
 
 static inline u16 __get_unaligned_cpu16(const void *p)
 {
-- 
cgit v1.2.3


From 4be2c95d1f7706ca0e74499f2bd118e1cee19669 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Tue, 21 Dec 2010 17:24:30 -0800
Subject: taskstats: pad taskstats netlink response for aligment issues on ia64

The taskstats structure is internally aligned on 8 byte boundaries but the
layout of the aggregrate reply, with two NLA headers and the pid (each 4
bytes), actually force the entire structure to be unaligned.  This causes
the kernel to issue unaligned access warnings on some architectures like
ia64.  Unfortunately, some software out there doesn't properly unroll the
NLA packet and assumes that the start of the taskstats structure will
always be 20 bytes from the start of the netlink payload.  Aligning the
start of the taskstats structure breaks this software, which we don't
want.  So, for now the alignment only happens on architectures that
require it and those users will have to update to fixed versions of those
packages.  Space is reserved in the packet only when needed.  This ifdef
should be removed in several years e.g.  2012 once we can be confident
that fixed versions are installed on most systems.  We add the padding
before the aggregate since the aggregate is already a defined type.

Commit 85893120 ("delayacct: align to 8 byte boundary on 64-bit systems")
previously addressed the alignment issues by padding out the pid field.
This was supposed to be a compatible change but the circumstances
described above mean that it wasn't.  This patch backs out that change,
since it was a hack, and introduces a new NULL attribute type to provide
the padding.  Padding the response with 4 bytes avoids allocating an
aligned taskstats structure and copying it back.  Since the structure
weighs in at 328 bytes, it's too big to do it on the stack.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Reported-by: Brian Rogers <brian@xyzw.org>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Guillaume Chazarain <guichaz@gmail.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/accounting/getdelays.c |  1 +
 include/linux/taskstats.h            |  3 +-
 kernel/taskstats.c                   | 57 ++++++++++++++++++++++++++++--------
 3 files changed, 47 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index a2976a6de033..e9c77788a39d 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -516,6 +516,7 @@ int main(int argc, char *argv[])
 			default:
 				fprintf(stderr, "Unknown nla_type %d\n",
 					na->nla_type);
+			case TASKSTATS_TYPE_NULL:
 				break;
 			}
 			na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 341dddb55090..2466e550a41d 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -33,7 +33,7 @@
  */
 
 
-#define TASKSTATS_VERSION	7
+#define TASKSTATS_VERSION	8
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 					 * in linux/sched.h */
 
@@ -188,6 +188,7 @@ enum {
 	TASKSTATS_TYPE_STATS,		/* taskstats structure */
 	TASKSTATS_TYPE_AGGR_PID,	/* contains pid + stats */
 	TASKSTATS_TYPE_AGGR_TGID,	/* contains tgid + stats */
+	TASKSTATS_TYPE_NULL,		/* contains nothing */
 	__TASKSTATS_TYPE_MAX,
 };
 
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index c8231fb15708..3308fd7f1b52 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -349,25 +349,47 @@ static int parse(struct nlattr *na, struct cpumask *mask)
 	return ret;
 }
 
+#ifdef CONFIG_IA64
+#define TASKSTATS_NEEDS_PADDING 1
+#endif
+
 static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid)
 {
 	struct nlattr *na, *ret;
 	int aggr;
 
-	/* If we don't pad, we end up with alignment on a 4 byte boundary.
-	 * This causes lots of runtime warnings on systems requiring 8 byte
-	 * alignment */
-	u32 pids[2] = { pid, 0 };
-	int pid_size = ALIGN(sizeof(pid), sizeof(long));
-
 	aggr = (type == TASKSTATS_TYPE_PID)
 			? TASKSTATS_TYPE_AGGR_PID
 			: TASKSTATS_TYPE_AGGR_TGID;
 
+	/*
+	 * The taskstats structure is internally aligned on 8 byte
+	 * boundaries but the layout of the aggregrate reply, with
+	 * two NLA headers and the pid (each 4 bytes), actually
+	 * force the entire structure to be unaligned. This causes
+	 * the kernel to issue unaligned access warnings on some
+	 * architectures like ia64. Unfortunately, some software out there
+	 * doesn't properly unroll the NLA packet and assumes that the start
+	 * of the taskstats structure will always be 20 bytes from the start
+	 * of the netlink payload. Aligning the start of the taskstats
+	 * structure breaks this software, which we don't want. So, for now
+	 * the alignment only happens on architectures that require it
+	 * and those users will have to update to fixed versions of those
+	 * packages. Space is reserved in the packet only when needed.
+	 * This ifdef should be removed in several years e.g. 2012 once
+	 * we can be confident that fixed versions are installed on most
+	 * systems. We add the padding before the aggregate since the
+	 * aggregate is already a defined type.
+	 */
+#ifdef TASKSTATS_NEEDS_PADDING
+	if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0)
+		goto err;
+#endif
 	na = nla_nest_start(skb, aggr);
 	if (!na)
 		goto err;
-	if (nla_put(skb, type, pid_size, pids) < 0)
+
+	if (nla_put(skb, type, sizeof(pid), &pid) < 0)
 		goto err;
 	ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats));
 	if (!ret)
@@ -456,6 +478,18 @@ out:
 	return rc;
 }
 
+static size_t taskstats_packet_size(void)
+{
+	size_t size;
+
+	size = nla_total_size(sizeof(u32)) +
+		nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
+#ifdef TASKSTATS_NEEDS_PADDING
+	size += nla_total_size(0); /* Padding for alignment */
+#endif
+	return size;
+}
+
 static int cmd_attr_pid(struct genl_info *info)
 {
 	struct taskstats *stats;
@@ -464,8 +498,7 @@ static int cmd_attr_pid(struct genl_info *info)
 	u32 pid;
 	int rc;
 
-	size = nla_total_size(sizeof(u32)) +
-		nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
+	size = taskstats_packet_size();
 
 	rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
 	if (rc < 0)
@@ -494,8 +527,7 @@ static int cmd_attr_tgid(struct genl_info *info)
 	u32 tgid;
 	int rc;
 
-	size = nla_total_size(sizeof(u32)) +
-		nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
+	size = taskstats_packet_size();
 
 	rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
 	if (rc < 0)
@@ -570,8 +602,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
 	/*
 	 * Size includes space for nested attributes
 	 */
-	size = nla_total_size(sizeof(u32)) +
-		nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
+	size = taskstats_packet_size();
 
 	is_thread_group = !!taskstats_tgid_alloc(tsk);
 	if (is_thread_group) {
-- 
cgit v1.2.3


From e058464990c2ef1f3ecd6b83a154913c3c06f02a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 23 Dec 2010 12:03:57 -0800
Subject: Revert "ipv4: Allow configuring subnets as local addresses"

This reverts commit 4465b469008bc03b98a1b8df4e9ae501b6c69d4b.

Conflicts:

	net/ipv4/fib_frontend.c

As reported by Ben Greear, this causes regressions:

> Change 4465b469008bc03b98a1b8df4e9ae501b6c69d4b caused rules
> to stop matching the input device properly because the
> FLOWI_FLAG_MATCH_ANY_IIF is always defined in ip_dev_find().
>
> This breaks rules such as:
>
> ip rule add pref 512 lookup local
> ip rule del pref 0 lookup local
> ip link set eth2 up
> ip -4 addr add 172.16.0.102/24 broadcast 172.16.0.255 dev eth2
> ip rule add to 172.16.0.102 iif eth2 lookup local pref 10
> ip rule add iif eth2 lookup 10001 pref 20
> ip route add 172.16.0.0/24 dev eth2 table 10001
> ip route add unreachable 0/0 table 10001
>
> If you had a second interface 'eth0' that was on a different
> subnet, pinging a system on that interface would fail:
>
>   [root@ct503-60 ~]# ping 192.168.100.1
>   connect: Invalid argument

Reported-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow.h      |  1 -
 net/core/fib_rules.c    |  3 +--
 net/ipv4/fib_frontend.c | 10 ++++++++--
 3 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/flow.h b/include/net/flow.h
index 0ac3fb5e0973..bb08692a20b0 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -49,7 +49,6 @@ struct flowi {
 	__u8	proto;
 	__u8	flags;
 #define FLOWI_FLAG_ANYSRC 0x01
-#define FLOWI_FLAG_MATCH_ANY_IIF 0x02
 	union {
 		struct {
 			__be16	sport;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 82a4369ae150..a20e5d3bbfa0 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -181,8 +181,7 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
 {
 	int ret = 0;
 
-	if (rule->iifindex && (rule->iifindex != fl->iif) &&
-	    !(fl->flags & FLOWI_FLAG_MATCH_ANY_IIF))
+	if (rule->iifindex && (rule->iifindex != fl->iif))
 		goto out;
 
 	if (rule->oifindex && (rule->oifindex != fl->oif))
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index eb6f69a8f27a..c19c1f739fba 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -163,13 +163,19 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 				.daddr = addr
 			}
 		},
-		.flags = FLOWI_FLAG_MATCH_ANY_IIF
 	};
 	struct fib_result res = { 0 };
 	struct net_device *dev = NULL;
+	struct fib_table *local_table;
+
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	res.r = NULL;
+#endif
 
 	rcu_read_lock();
-	if (fib_lookup(net, &fl, &res)) {
+	local_table = fib_get_table(net, RT_TABLE_LOCAL);
+	if (!local_table ||
+	    fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) {
 		rcu_read_unlock();
 		return NULL;
 	}
-- 
cgit v1.2.3


From 8f33d5277fada0291ea495f7fd44a3e7b7aa41d3 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Wed, 22 Dec 2010 14:46:46 +0100
Subject: dmaengine: provide dummy functions for DMA_ENGINE=n

This lets drivers, optionally using the dmaengine, build with DMA_ENGINE
unselected.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 9d8688b92d8b..8cd00ad98d37 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -824,6 +824,8 @@ enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie);
 #ifdef CONFIG_DMA_ENGINE
 enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx);
 void dma_issue_pending_all(void);
+struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param);
+void dma_release_channel(struct dma_chan *chan);
 #else
 static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
 {
@@ -831,7 +833,14 @@ static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descript
 }
 static inline void dma_issue_pending_all(void)
 {
-	do { } while (0);
+}
+static inline struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask,
+					      dma_filter_fn fn, void *fn_param)
+{
+	return NULL;
+}
+static inline void dma_release_channel(struct dma_chan *chan)
+{
 }
 #endif
 
@@ -842,8 +851,6 @@ void dma_async_device_unregister(struct dma_device *device);
 void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
 struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type);
 #define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y)
-struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param);
-void dma_release_channel(struct dma_chan *chan);
 
 /* --- Helper iov-locking functions --- */
 
-- 
cgit v1.2.3


From 46e67acd5d4cacda758e871eebd15cef4e2c2665 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 3 Jan 2011 09:09:56 -0200
Subject: [media] wm8775: Revert changeset fcb9757333 to avoid a regression

It seems that cx88 and ivtv use wm8775 on some different modes. The
patch that added support for a board with wm8775 broke ivtv boards with
this device. As we're too close to release 2.6.37, let's just revert
it.

Reported-by: Andy Walls <awalls@md.metrocast.net>
Reported-by: Eric Sharkey <eric@lisaneric.org>
Reported-by: Auric <auric@aanet.com.au>
Reported by: David Gesswein <djg@pdp8online.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/cx88/cx88-alsa.c  |  99 ++++----------------------------
 drivers/media/video/cx88/cx88-cards.c |   7 +++
 drivers/media/video/cx88/cx88-video.c |  27 +--------
 drivers/media/video/cx88/cx88.h       |   6 +-
 drivers/media/video/wm8775.c          | 104 +++++++++++++---------------------
 include/media/wm8775.h                |   3 -
 6 files changed, 61 insertions(+), 185 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/cx88/cx88-alsa.c b/drivers/media/video/cx88/cx88-alsa.c
index 4aaa47c0eabf..54b7fcd469a8 100644
--- a/drivers/media/video/cx88/cx88-alsa.c
+++ b/drivers/media/video/cx88/cx88-alsa.c
@@ -40,7 +40,6 @@
 #include <sound/control.h>
 #include <sound/initval.h>
 #include <sound/tlv.h>
-#include <media/wm8775.h>
 
 #include "cx88.h"
 #include "cx88-reg.h"
@@ -587,47 +586,26 @@ static int snd_cx88_volume_put(struct snd_kcontrol *kcontrol,
 	int left, right, v, b;
 	int changed = 0;
 	u32 old;
-	struct v4l2_control client_ctl;
-
-	/* Pass volume & balance onto any WM8775 */
-	if (value->value.integer.value[0] >= value->value.integer.value[1]) {
-		v = value->value.integer.value[0] << 10;
-		b = value->value.integer.value[0] ?
-			(0x8000 * value->value.integer.value[1]) / value->value.integer.value[0] :
-			0x8000;
-	} else {
-		v = value->value.integer.value[1] << 10;
-		b = value->value.integer.value[1] ?
-		0xffff - (0x8000 * value->value.integer.value[0]) / value->value.integer.value[1] :
-		0x8000;
-	}
-	client_ctl.value = v;
-	client_ctl.id = V4L2_CID_AUDIO_VOLUME;
-	call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
-
-	client_ctl.value = b;
-	client_ctl.id = V4L2_CID_AUDIO_BALANCE;
-	call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
 
 	left = value->value.integer.value[0] & 0x3f;
 	right = value->value.integer.value[1] & 0x3f;
 	b = right - left;
 	if (b < 0) {
-		v = 0x3f - left;
-		b = (-b) | 0x40;
+	    v = 0x3f - left;
+	    b = (-b) | 0x40;
 	} else {
-		v = 0x3f - right;
+	    v = 0x3f - right;
 	}
 	/* Do we really know this will always be called with IRQs on? */
 	spin_lock_irq(&chip->reg_lock);
 	old = cx_read(AUD_VOL_CTL);
 	if (v != (old & 0x3f)) {
-		cx_swrite(SHADOW_AUD_VOL_CTL, AUD_VOL_CTL, (old & ~0x3f) | v);
-		changed = 1;
+	    cx_write(AUD_VOL_CTL, (old & ~0x3f) | v);
+	    changed = 1;
 	}
-	if ((cx_read(AUD_BAL_CTL) & 0x7f) != b) {
-		cx_write(AUD_BAL_CTL, b);
-		changed = 1;
+	if (cx_read(AUD_BAL_CTL) != b) {
+	    cx_write(AUD_BAL_CTL, b);
+	    changed = 1;
 	}
 	spin_unlock_irq(&chip->reg_lock);
 
@@ -640,7 +618,7 @@ static const struct snd_kcontrol_new snd_cx88_volume = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
 		  SNDRV_CTL_ELEM_ACCESS_TLV_READ,
-	.name = "Analog-TV Volume",
+	.name = "Playback Volume",
 	.info = snd_cx88_volume_info,
 	.get = snd_cx88_volume_get,
 	.put = snd_cx88_volume_put,
@@ -671,14 +649,7 @@ static int snd_cx88_switch_put(struct snd_kcontrol *kcontrol,
 	vol = cx_read(AUD_VOL_CTL);
 	if (value->value.integer.value[0] != !(vol & bit)) {
 		vol ^= bit;
-		cx_swrite(SHADOW_AUD_VOL_CTL, AUD_VOL_CTL, vol);
-		/* Pass mute onto any WM8775 */
-		if ((1<<6) == bit) {
-			struct v4l2_control client_ctl;
-			client_ctl.value = 0 != (vol & bit);
-			client_ctl.id = V4L2_CID_AUDIO_MUTE;
-			call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
-		}
+		cx_write(AUD_VOL_CTL, vol);
 		ret = 1;
 	}
 	spin_unlock_irq(&chip->reg_lock);
@@ -687,7 +658,7 @@ static int snd_cx88_switch_put(struct snd_kcontrol *kcontrol,
 
 static const struct snd_kcontrol_new snd_cx88_dac_switch = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-	.name = "Audio-Out Switch",
+	.name = "Playback Switch",
 	.info = snd_ctl_boolean_mono_info,
 	.get = snd_cx88_switch_get,
 	.put = snd_cx88_switch_put,
@@ -696,49 +667,13 @@ static const struct snd_kcontrol_new snd_cx88_dac_switch = {
 
 static const struct snd_kcontrol_new snd_cx88_source_switch = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-	.name = "Analog-TV Switch",
+	.name = "Capture Switch",
 	.info = snd_ctl_boolean_mono_info,
 	.get = snd_cx88_switch_get,
 	.put = snd_cx88_switch_put,
 	.private_value = (1<<6),
 };
 
-static int snd_cx88_alc_get(struct snd_kcontrol *kcontrol,
-			       struct snd_ctl_elem_value *value)
-{
-	snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol);
-	struct cx88_core *core = chip->core;
-	struct v4l2_control client_ctl;
-
-	client_ctl.id = V4L2_CID_AUDIO_LOUDNESS;
-	call_hw(core, WM8775_GID, core, g_ctrl, &client_ctl);
-	value->value.integer.value[0] = client_ctl.value ? 1 : 0;
-
-	return 0;
-}
-
-static int snd_cx88_alc_put(struct snd_kcontrol *kcontrol,
-				       struct snd_ctl_elem_value *value)
-{
-	snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol);
-	struct cx88_core *core = chip->core;
-	struct v4l2_control client_ctl;
-
-	client_ctl.value = 0 != value->value.integer.value[0];
-	client_ctl.id = V4L2_CID_AUDIO_LOUDNESS;
-	call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
-
-	return 0;
-}
-
-static struct snd_kcontrol_new snd_cx88_alc_switch = {
-	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-	.name = "Line-In ALC Switch",
-	.info = snd_ctl_boolean_mono_info,
-	.get = snd_cx88_alc_get,
-	.put = snd_cx88_alc_put,
-};
-
 /****************************************************************************
 			Basic Flow for Sound Devices
  ****************************************************************************/
@@ -860,7 +795,6 @@ static int __devinit cx88_audio_initdev(struct pci_dev *pci,
 {
 	struct snd_card  *card;
 	snd_cx88_card_t  *chip;
-	struct v4l2_subdev *sd;
 	int              err;
 
 	if (devno >= SNDRV_CARDS)
@@ -896,15 +830,6 @@ static int __devinit cx88_audio_initdev(struct pci_dev *pci,
 	if (err < 0)
 		goto error;
 
-	/* If there's a wm8775 then add a Line-In ALC switch */
-	list_for_each_entry(sd, &chip->core->v4l2_dev.subdevs, list) {
-		if (WM8775_GID == sd->grp_id) {
-			snd_ctl_add(card, snd_ctl_new1(&snd_cx88_alc_switch,
-						       chip));
-			break;
-		}
-	}
-
 	strcpy (card->driver, "CX88x");
 	sprintf(card->shortname, "Conexant CX%x", pci->device);
 	sprintf(card->longname, "%s at %#llx",
diff --git a/drivers/media/video/cx88/cx88-cards.c b/drivers/media/video/cx88/cx88-cards.c
index 9b9e169cce90..0ccc2afd7266 100644
--- a/drivers/media/video/cx88/cx88-cards.c
+++ b/drivers/media/video/cx88/cx88-cards.c
@@ -1007,15 +1007,22 @@ static const struct cx88_board cx88_boards[] = {
 		.radio_type	= UNSET,
 		.tuner_addr	= ADDR_UNSET,
 		.radio_addr	= ADDR_UNSET,
+		.audio_chip = V4L2_IDENT_WM8775,
 		.input		= {{
 			.type	= CX88_VMUX_DVB,
 			.vmux	= 0,
+			/* 2: Line-In */
+			.audioroute = 2,
 		},{
 			.type	= CX88_VMUX_COMPOSITE1,
 			.vmux	= 1,
+			/* 2: Line-In */
+			.audioroute = 2,
 		},{
 			.type	= CX88_VMUX_SVIDEO,
 			.vmux	= 2,
+			/* 2: Line-In */
+			.audioroute = 2,
 		}},
 		.mpeg           = CX88_MPEG_DVB,
 	},
diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c
index 62cea9549404..d9249e5a04c9 100644
--- a/drivers/media/video/cx88/cx88-video.c
+++ b/drivers/media/video/cx88/cx88-video.c
@@ -40,7 +40,6 @@
 #include "cx88.h"
 #include <media/v4l2-common.h>
 #include <media/v4l2-ioctl.h>
-#include <media/wm8775.h>
 
 MODULE_DESCRIPTION("v4l2 driver module for cx2388x based TV cards");
 MODULE_AUTHOR("Gerd Knorr <kraxel@bytesex.org> [SuSE Labs]");
@@ -977,7 +976,6 @@ int cx88_set_control(struct cx88_core *core, struct v4l2_control *ctl)
 	const struct cx88_ctrl *c = NULL;
 	u32 value,mask;
 	int i;
-	struct v4l2_control client_ctl;
 
 	for (i = 0; i < CX8800_CTLS; i++) {
 		if (cx8800_ctls[i].v.id == ctl->id) {
@@ -991,27 +989,6 @@ int cx88_set_control(struct cx88_core *core, struct v4l2_control *ctl)
 		ctl->value = c->v.minimum;
 	if (ctl->value > c->v.maximum)
 		ctl->value = c->v.maximum;
-
-	/* Pass changes onto any WM8775 */
-	client_ctl.id = ctl->id;
-	switch (ctl->id) {
-	case V4L2_CID_AUDIO_MUTE:
-		client_ctl.value = ctl->value;
-		break;
-	case V4L2_CID_AUDIO_VOLUME:
-		client_ctl.value = (ctl->value) ?
-			(0x90 + ctl->value) << 8 : 0;
-		break;
-	case V4L2_CID_AUDIO_BALANCE:
-		client_ctl.value = ctl->value << 9;
-		break;
-	default:
-		client_ctl.id = 0;
-		break;
-	}
-	if (client_ctl.id)
-		call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
-
 	mask=c->mask;
 	switch (ctl->id) {
 	case V4L2_CID_AUDIO_BALANCE:
@@ -1558,9 +1535,7 @@ static int radio_queryctrl (struct file *file, void *priv,
 	if (c->id <  V4L2_CID_BASE ||
 		c->id >= V4L2_CID_LASTP1)
 		return -EINVAL;
-	if (c->id == V4L2_CID_AUDIO_MUTE ||
-		c->id == V4L2_CID_AUDIO_VOLUME ||
-		c->id == V4L2_CID_AUDIO_BALANCE) {
+	if (c->id == V4L2_CID_AUDIO_MUTE) {
 		for (i = 0; i < CX8800_CTLS; i++) {
 			if (cx8800_ctls[i].v.id == c->id)
 				break;
diff --git a/drivers/media/video/cx88/cx88.h b/drivers/media/video/cx88/cx88.h
index e8c732e7ae4f..c9981e77416a 100644
--- a/drivers/media/video/cx88/cx88.h
+++ b/drivers/media/video/cx88/cx88.h
@@ -398,19 +398,17 @@ static inline struct cx88_core *to_core(struct v4l2_device *v4l2_dev)
 	return container_of(v4l2_dev, struct cx88_core, v4l2_dev);
 }
 
-#define call_hw(core, grpid, o, f, args...) \
+#define call_all(core, o, f, args...) 				\
 	do {							\
 		if (!core->i2c_rc) {				\
 			if (core->gate_ctrl)			\
 				core->gate_ctrl(core, 1);	\
-			v4l2_device_call_all(&core->v4l2_dev, grpid, o, f, ##args); \
+			v4l2_device_call_all(&core->v4l2_dev, 0, o, f, ##args); \
 			if (core->gate_ctrl)			\
 				core->gate_ctrl(core, 0);	\
 		}						\
 	} while (0)
 
-#define call_all(core, o, f, args...) call_hw(core, 0, o, f, ##args)
-
 struct cx8800_dev;
 struct cx8802_dev;
 
diff --git a/drivers/media/video/wm8775.c b/drivers/media/video/wm8775.c
index 135525649086..fe8ef6419f83 100644
--- a/drivers/media/video/wm8775.c
+++ b/drivers/media/video/wm8775.c
@@ -35,7 +35,6 @@
 #include <media/v4l2-device.h>
 #include <media/v4l2-chip-ident.h>
 #include <media/v4l2-ctrls.h>
-#include <media/wm8775.h>
 
 MODULE_DESCRIPTION("wm8775 driver");
 MODULE_AUTHOR("Ulf Eklund, Hans Verkuil");
@@ -51,16 +50,10 @@ enum {
 	TOT_REGS
 };
 
-#define ALC_HOLD 0x85 /* R17: use zero cross detection, ALC hold time 42.6 ms */
-#define ALC_EN 0x100  /* R17: ALC enable */
-
 struct wm8775_state {
 	struct v4l2_subdev sd;
 	struct v4l2_ctrl_handler hdl;
 	struct v4l2_ctrl *mute;
-	struct v4l2_ctrl *vol;
-	struct v4l2_ctrl *bal;
-	struct v4l2_ctrl *loud;
 	u8 input;		/* Last selected input (0-0xf) */
 };
 
@@ -92,30 +85,6 @@ static int wm8775_write(struct v4l2_subdev *sd, int reg, u16 val)
 	return -1;
 }
 
-static void wm8775_set_audio(struct v4l2_subdev *sd, int quietly)
-{
-	struct wm8775_state *state = to_state(sd);
-	u8 vol_l, vol_r;
-	int muted = 0 != state->mute->val;
-	u16 volume = (u16)state->vol->val;
-	u16 balance = (u16)state->bal->val;
-
-	/* normalize ( 65535 to 0 -> 255 to 0 (+24dB to -103dB) ) */
-	vol_l = (min(65536 - balance, 32768) * volume) >> 23;
-	vol_r = (min(balance, (u16)32768) * volume) >> 23;
-
-	/* Mute */
-	if (muted || quietly)
-		wm8775_write(sd, R21, 0x0c0 | state->input);
-
-	wm8775_write(sd, R14, vol_l | 0x100); /* 0x100= Left channel ADC zero cross enable */
-	wm8775_write(sd, R15, vol_r | 0x100); /* 0x100= Right channel ADC zero cross enable */
-
-	/* Un-mute */
-	if (!muted)
-		wm8775_write(sd, R21, state->input);
-}
-
 static int wm8775_s_routing(struct v4l2_subdev *sd,
 			    u32 input, u32 output, u32 config)
 {
@@ -133,26 +102,25 @@ static int wm8775_s_routing(struct v4l2_subdev *sd,
 	state->input = input;
 	if (!v4l2_ctrl_g_ctrl(state->mute))
 		return 0;
-	if (!v4l2_ctrl_g_ctrl(state->vol))
-		return 0;
-	if (!v4l2_ctrl_g_ctrl(state->bal))
-		return 0;
-	wm8775_set_audio(sd, 1);
+	wm8775_write(sd, R21, 0x0c0);
+	wm8775_write(sd, R14, 0x1d4);
+	wm8775_write(sd, R15, 0x1d4);
+	wm8775_write(sd, R21, 0x100 + state->input);
 	return 0;
 }
 
 static int wm8775_s_ctrl(struct v4l2_ctrl *ctrl)
 {
 	struct v4l2_subdev *sd = to_sd(ctrl);
+	struct wm8775_state *state = to_state(sd);
 
 	switch (ctrl->id) {
 	case V4L2_CID_AUDIO_MUTE:
-	case V4L2_CID_AUDIO_VOLUME:
-	case V4L2_CID_AUDIO_BALANCE:
-		wm8775_set_audio(sd, 0);
-		return 0;
-	case V4L2_CID_AUDIO_LOUDNESS:
-		wm8775_write(sd, R17, (ctrl->val ? ALC_EN : 0) | ALC_HOLD);
+		wm8775_write(sd, R21, 0x0c0);
+		wm8775_write(sd, R14, 0x1d4);
+		wm8775_write(sd, R15, 0x1d4);
+		if (!ctrl->val)
+			wm8775_write(sd, R21, 0x100 + state->input);
 		return 0;
 	}
 	return -EINVAL;
@@ -176,7 +144,16 @@ static int wm8775_log_status(struct v4l2_subdev *sd)
 
 static int wm8775_s_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *freq)
 {
-	wm8775_set_audio(sd, 0);
+	struct wm8775_state *state = to_state(sd);
+
+	/* If I remove this, then it can happen that I have no
+	   sound the first time I tune from static to a valid channel.
+	   It's difficult to reproduce and is almost certainly related
+	   to the zero cross detect circuit. */
+	wm8775_write(sd, R21, 0x0c0);
+	wm8775_write(sd, R14, 0x1d4);
+	wm8775_write(sd, R15, 0x1d4);
+	wm8775_write(sd, R21, 0x100 + state->input);
 	return 0;
 }
 
@@ -226,7 +203,6 @@ static int wm8775_probe(struct i2c_client *client,
 {
 	struct wm8775_state *state;
 	struct v4l2_subdev *sd;
-	int err;
 
 	/* Check if the adapter supports the needed features */
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
@@ -240,21 +216,15 @@ static int wm8775_probe(struct i2c_client *client,
 		return -ENOMEM;
 	sd = &state->sd;
 	v4l2_i2c_subdev_init(sd, client, &wm8775_ops);
-	sd->grp_id = WM8775_GID; /* subdev group id */
 	state->input = 2;
 
-	v4l2_ctrl_handler_init(&state->hdl, 4);
+	v4l2_ctrl_handler_init(&state->hdl, 1);
 	state->mute = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
 			V4L2_CID_AUDIO_MUTE, 0, 1, 1, 0);
-	state->vol = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
-			V4L2_CID_AUDIO_VOLUME, 0, 65535, (65535+99)/100, 0xCF00); /* 0dB*/
-	state->bal = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
-			V4L2_CID_AUDIO_BALANCE, 0, 65535, (65535+99)/100, 32768);
-	state->loud = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
-			V4L2_CID_AUDIO_LOUDNESS, 0, 1, 1, 1);
 	sd->ctrl_handler = &state->hdl;
-	err = state->hdl.error;
-	if (err) {
+	if (state->hdl.error) {
+		int err = state->hdl.error;
+
 		v4l2_ctrl_handler_free(&state->hdl);
 		kfree(state);
 		return err;
@@ -266,25 +236,29 @@ static int wm8775_probe(struct i2c_client *client,
 	wm8775_write(sd, R23, 0x000);
 	/* Disable zero cross detect timeout */
 	wm8775_write(sd, R7, 0x000);
-	/* HPF enable, I2S mode, 24-bit */
-	wm8775_write(sd, R11, 0x022);
+	/* Left justified, 24-bit mode */
+	wm8775_write(sd, R11, 0x021);
 	/* Master mode, clock ratio 256fs */
 	wm8775_write(sd, R12, 0x102);
 	/* Powered up */
 	wm8775_write(sd, R13, 0x000);
-	/* ALC stereo, ALC target level -5dB FS, ALC max gain +8dB */
-	wm8775_write(sd, R16, 0x1bb);
-	/* Set ALC mode and hold time */
-	wm8775_write(sd, R17, (state->loud->val ? ALC_EN : 0) | ALC_HOLD);
+	/* ADC gain +2.5dB, enable zero cross */
+	wm8775_write(sd, R14, 0x1d4);
+	/* ADC gain +2.5dB, enable zero cross */
+	wm8775_write(sd, R15, 0x1d4);
+	/* ALC Stereo, ALC target level -1dB FS max gain +8dB */
+	wm8775_write(sd, R16, 0x1bf);
+	/* Enable gain control, use zero cross detection,
+	   ALC hold time 42.6 ms */
+	wm8775_write(sd, R17, 0x185);
 	/* ALC gain ramp up delay 34 s, ALC gain ramp down delay 33 ms */
 	wm8775_write(sd, R18, 0x0a2);
 	/* Enable noise gate, threshold -72dBfs */
 	wm8775_write(sd, R19, 0x005);
-	/* Transient window 4ms, ALC min gain -5dB  */
-	wm8775_write(sd, R20, 0x0fb);
-
-	wm8775_set_audio(sd, 1);      /* set volume/mute/mux */
-
+	/* Transient window 4ms, lower PGA gain limit -1dB */
+	wm8775_write(sd, R20, 0x07a);
+	/* LRBOTH = 1, use input 2. */
+	wm8775_write(sd, R21, 0x102);
 	return 0;
 }
 
diff --git a/include/media/wm8775.h b/include/media/wm8775.h
index a1c4d417dfa2..60739c5a23ae 100644
--- a/include/media/wm8775.h
+++ b/include/media/wm8775.h
@@ -32,7 +32,4 @@
 #define WM8775_AIN3 4
 #define WM8775_AIN4 8
 
-/* subdev group ID */
-#define WM8775_GID (1 << 0)
-
 #endif
-- 
cgit v1.2.3