From 446411e18b2cb17d153e45f634a3c9a79ada3ac2 Mon Sep 17 00:00:00 2001 From: Andreas Larsson Date: Wed, 13 Feb 2013 14:20:25 +0100 Subject: spi: Initialize cs_gpio and cs_gpios with -ENOENT The return value from of_get_named_gpio is -ENOENT when the given index matches a hole in the "cs-gpios" property phandle list. However, the default value of cs_gpio in struct spi_device and entries of cs_gpios in struct spi_master is -EINVAL, which is documented to indicate that a GPIO line should not be used for the given spi_device. This sets the default value of cs_gpio in struct spi_device and entries of cs_gpios in struct spi_master to -ENOENT. Thus, -ENOENT is the only value used to indicate that no GPIO line should be used. Signed-off-by: Andreas Larsson Signed-off-by: Grant Likely --- include/linux/spi/spi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 38c2b925923d..c395f32b53b3 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -57,7 +57,7 @@ extern struct bus_type spi_bus_type; * @modalias: Name of the driver to use with this device, or an alias * for that name. This appears in the sysfs "modalias" attribute * for driver coldplugging, and in uevents used for hotplugging - * @cs_gpio: gpio number of the chipselect line (optional, -EINVAL when + * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when * when not using a GPIO line) * * A @spi_device is used to interchange data between an SPI slave @@ -261,7 +261,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * queue so the subsystem notifies the driver that it may relax the * hardware by issuing this call * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS - * number. Any individual value may be -EINVAL for CS lines that + * number. Any individual value may be -ENOENT for CS lines that * are not GPIOs (driven by the SPI controller itself). * * Each SPI master controller can communicate with one or more @spi_device -- cgit v1.2.3 From e30b5dca15dea86aa697f9d58ff646294fe80d3d Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 3 May 2013 02:15:52 -0400 Subject: ext4: fix fio regression We (Linux Kernel Performance project) found a regression introduced by commit: f7fec032aa ext4: track all extent status in extent status tree The commit causes about 20% performance decrease in fio random write test. Profiler shows that rb_next() uses a lot of CPU time. The call stack is: rb_next ext4_es_find_delayed_extent ext4_map_blocks _ext4_get_block ext4_get_block_write __blockdev_direct_IO ext4_direct_IO generic_file_direct_write __generic_file_aio_write ext4_file_write aio_rw_vect_retry aio_run_iocb do_io_submit sys_io_submit system_call_fastpath io_submit td_io_getevents io_u_queued_complete thread_main main __libc_start_main The cause is that ext4_es_find_delayed_extent() doesn't have an upper bound, it keeps searching until a delayed extent is found. When there are a lots of non-delayed entries in the extent state tree, ext4_es_find_delayed_extent() may uses a lot of CPU time. Reported-by: LKP project Signed-off-by: Yan, Zheng Signed-off-by: Zheng Liu Cc: "Theodore Ts'o" --- fs/ext4/extents.c | 9 +++++---- fs/ext4/extents_status.c | 17 ++++++++++++----- fs/ext4/extents_status.h | 3 ++- fs/ext4/file.c | 4 ++-- include/trace/events/ext4.h | 4 ++-- 5 files changed, 23 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 107936db244e..bc0f1910b9cf 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3642,7 +3642,7 @@ int ext4_find_delalloc_range(struct inode *inode, { struct extent_status es; - ext4_es_find_delayed_extent(inode, lblk_start, &es); + ext4_es_find_delayed_extent_range(inode, lblk_start, lblk_end, &es); if (es.es_len == 0) return 0; /* there is no delay extent in this tree */ else if (es.es_lblk <= lblk_start && @@ -4608,9 +4608,10 @@ static int ext4_find_delayed_extent(struct inode *inode, struct extent_status es; ext4_lblk_t block, next_del; - ext4_es_find_delayed_extent(inode, newes->es_lblk, &es); - if (newes->es_pblk == 0) { + ext4_es_find_delayed_extent_range(inode, newes->es_lblk, + newes->es_lblk + newes->es_len - 1, &es); + /* * No extent in extent-tree contains block @newes->es_pblk, * then the block may stay in 1)a hole or 2)delayed-extent. @@ -4630,7 +4631,7 @@ static int ext4_find_delayed_extent(struct inode *inode, } block = newes->es_lblk + newes->es_len; - ext4_es_find_delayed_extent(inode, block, &es); + ext4_es_find_delayed_extent_range(inode, block, EXT_MAX_BLOCKS, &es); if (es.es_len == 0) next_del = EXT_MAX_BLOCKS; else diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index fe3337a85ede..e6941e622d31 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -232,14 +232,16 @@ static struct extent_status *__es_tree_search(struct rb_root *root, } /* - * ext4_es_find_delayed_extent: find the 1st delayed extent covering @es->lblk - * if it exists, otherwise, the next extent after @es->lblk. + * ext4_es_find_delayed_extent_range: find the 1st delayed extent covering + * @es->lblk if it exists, otherwise, the next extent after @es->lblk. * * @inode: the inode which owns delayed extents * @lblk: the offset where we start to search + * @end: the offset where we stop to search * @es: delayed extent that we found */ -void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk, +void ext4_es_find_delayed_extent_range(struct inode *inode, + ext4_lblk_t lblk, ext4_lblk_t end, struct extent_status *es) { struct ext4_es_tree *tree = NULL; @@ -247,7 +249,8 @@ void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk, struct rb_node *node; BUG_ON(es == NULL); - trace_ext4_es_find_delayed_extent_enter(inode, lblk); + BUG_ON(end < lblk); + trace_ext4_es_find_delayed_extent_range_enter(inode, lblk); read_lock(&EXT4_I(inode)->i_es_lock); tree = &EXT4_I(inode)->i_es_tree; @@ -270,6 +273,10 @@ out: if (es1 && !ext4_es_is_delayed(es1)) { while ((node = rb_next(&es1->rb_node)) != NULL) { es1 = rb_entry(node, struct extent_status, rb_node); + if (es1->es_lblk > end) { + es1 = NULL; + break; + } if (ext4_es_is_delayed(es1)) break; } @@ -285,7 +292,7 @@ out: read_unlock(&EXT4_I(inode)->i_es_lock); ext4_es_lru_add(inode); - trace_ext4_es_find_delayed_extent_exit(inode, es); + trace_ext4_es_find_delayed_extent_range_exit(inode, es); } static struct extent_status * diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index d8e2d4dc311e..f740eb03b707 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h @@ -62,7 +62,8 @@ extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, unsigned long long status); extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len); -extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk, +extern void ext4_es_find_delayed_extent_range(struct inode *inode, + ext4_lblk_t lblk, ext4_lblk_t end, struct extent_status *es); extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, struct extent_status *es); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 64848b595b24..1bc4523541d6 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -464,7 +464,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) * If there is a delay extent at this offset, * it will be as a data. */ - ext4_es_find_delayed_extent(inode, last, &es); + ext4_es_find_delayed_extent_range(inode, last, last, &es); if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { if (last != start) dataoff = last << blkbits; @@ -547,7 +547,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) * If there is a delay extent at this offset, * we will skip this extent. */ - ext4_es_find_delayed_extent(inode, last, &es); + ext4_es_find_delayed_extent_range(inode, last, last, &es); if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { last = es.es_lblk + es.es_len; holeoff = last << blkbits; diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index d0e686402df8..8ee15b97cd38 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2139,7 +2139,7 @@ TRACE_EVENT(ext4_es_remove_extent, __entry->lblk, __entry->len) ); -TRACE_EVENT(ext4_es_find_delayed_extent_enter, +TRACE_EVENT(ext4_es_find_delayed_extent_range_enter, TP_PROTO(struct inode *inode, ext4_lblk_t lblk), TP_ARGS(inode, lblk), @@ -2161,7 +2161,7 @@ TRACE_EVENT(ext4_es_find_delayed_extent_enter, (unsigned long) __entry->ino, __entry->lblk) ); -TRACE_EVENT(ext4_es_find_delayed_extent_exit, +TRACE_EVENT(ext4_es_find_delayed_extent_range_exit, TP_PROTO(struct inode *inode, struct extent_status *es), TP_ARGS(inode, es), -- cgit v1.2.3 From 54afc1214133deddd49162b101971af5711f1b3a Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 8 May 2013 16:38:33 +0300 Subject: drm: Add kernel-doc for drm_fb_helper_funcs->initial_config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ville Syrjälä Signed-off-by: Dave Airlie --- include/drm/drm_fb_helper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h index 8230b46fdd73..61ebd51f6112 100644 --- a/include/drm/drm_fb_helper.h +++ b/include/drm/drm_fb_helper.h @@ -57,6 +57,7 @@ struct drm_fb_helper_surface_size { * @fb_probe: - Driver callback to allocate and initialize the fbdev info * structure. Futhermore it also needs to allocate the drm * framebuffer used to back the fbdev. + * @initial_config: - Setup an initial fbdev display configuration * * Driver callbacks used by the fbdev emulation helper library. */ -- cgit v1.2.3 From 7b97936f298af7991ca4717a4c23610d9ecd8927 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 8 May 2013 16:38:34 +0300 Subject: drm: Remove pointless '-' characters from drm_fb_helper documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ville Syrjälä Signed-off-by: Dave Airlie --- include/drm/drm_fb_helper.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h index 61ebd51f6112..471f276ce8f7 100644 --- a/include/drm/drm_fb_helper.h +++ b/include/drm/drm_fb_helper.h @@ -50,14 +50,14 @@ struct drm_fb_helper_surface_size { /** * struct drm_fb_helper_funcs - driver callbacks for the fbdev emulation library - * @gamma_set: - Set the given gamma lut register on the given crtc. - * @gamma_get: - Read the given gamma lut register on the given crtc, used to - * save the current lut when force-restoring the fbdev for e.g. - * kdbg. - * @fb_probe: - Driver callback to allocate and initialize the fbdev info - * structure. Futhermore it also needs to allocate the drm - * framebuffer used to back the fbdev. - * @initial_config: - Setup an initial fbdev display configuration + * @gamma_set: Set the given gamma lut register on the given crtc. + * @gamma_get: Read the given gamma lut register on the given crtc, used to + * save the current lut when force-restoring the fbdev for e.g. + * kdbg. + * @fb_probe: Driver callback to allocate and initialize the fbdev info + * structure. Futhermore it also needs to allocate the drm + * framebuffer used to back the fbdev. + * @initial_config: Setup an initial fbdev display configuration * * Driver callbacks used by the fbdev emulation helper library. */ -- cgit v1.2.3 From b9434d0f162c351e02e0e8f66ca6b75a67595537 Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Thu, 9 May 2013 14:20:40 +0100 Subject: drm: Use names of ioctls in debug traces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The intention here is to make the output of dmesg with full verbosity a bit easier for a human to parse. This commit transforms: [drm:drm_ioctl], pid=699, cmd=0x6458, nr=0x58, dev 0xe200, auth=1 [drm:drm_ioctl], pid=699, cmd=0xc010645b, nr=0x5b, dev 0xe200, auth=1 [drm:drm_ioctl], pid=699, cmd=0xc0106461, nr=0x61, dev 0xe200, auth=1 [drm:drm_ioctl], pid=699, cmd=0xc01c64ae, nr=0xae, dev 0xe200, auth=1 [drm:drm_mode_addfb], [FB:32] [drm:drm_ioctl], pid=699, cmd=0xc0106464, nr=0x64, dev 0xe200, auth=1 [drm:drm_vm_open_locked], 0x7fd9302fe000,0x00a00000 [drm:drm_ioctl], pid=699, cmd=0x400c645f, nr=0x5f, dev 0xe200, auth=1 [drm:drm_ioctl], pid=699, cmd=0xc00464af, nr=0xaf, dev 0xe200, auth=1 [drm:intel_crtc_set_config], [CRTC:3] [NOFB] into: [drm:drm_ioctl], pid=699, dev=0xe200, auth=1, I915_GEM_THROTTLE [drm:drm_ioctl], pid=699, dev=0xe200, auth=1, I915_GEM_CREATE [drm:drm_ioctl], pid=699, dev=0xe200, auth=1, I915_GEM_SET_TILING [drm:drm_ioctl], pid=699, dev=0xe200, auth=1, IOCTL_MODE_ADDFB [drm:drm_mode_addfb], [FB:32] [drm:drm_ioctl], pid=699, dev=0xe200, auth=1, I915_GEM_MMAP_GTT [drm:drm_vm_open_locked], 0x7fd9302fe000,0x00a00000 [drm:drm_ioctl], pid=699, dev=0xe200, auth=1, I915_GEM_SET_DOMAIN [drm:drm_ioctl], pid=699, dev=0xe200, auth=1, DRM_IOCTL_MODE_RMFB [drm:intel_crtc_set_config], [CRTC:3] [NOFB] v2: drm_ioctls is now a constant (Ville Syrjälä) Signed-off-by: Chris Cummins Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_drv.c | 20 +++++++++++++------- include/drm/drmP.h | 3 ++- 2 files changed, 15 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 8d4f29075af5..9cc247f55502 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -57,7 +57,7 @@ static int drm_version(struct drm_device *dev, void *data, struct drm_file *file_priv); #define DRM_IOCTL_DEF(ioctl, _func, _flags) \ - [DRM_IOCTL_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0} + [DRM_IOCTL_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl} /** Ioctl table */ static const struct drm_ioctl_desc drm_ioctls[] = { @@ -375,7 +375,7 @@ long drm_ioctl(struct file *filp, { struct drm_file *file_priv = filp->private_data; struct drm_device *dev; - const struct drm_ioctl_desc *ioctl; + const struct drm_ioctl_desc *ioctl = NULL; drm_ioctl_t *func; unsigned int nr = DRM_IOCTL_NR(cmd); int retcode = -EINVAL; @@ -392,11 +392,6 @@ long drm_ioctl(struct file *filp, atomic_inc(&dev->counts[_DRM_STAT_IOCTLS]); ++file_priv->ioctl_count; - DRM_DEBUG("pid=%d, cmd=0x%02x, nr=0x%02x, dev 0x%lx, auth=%d\n", - task_pid_nr(current), cmd, nr, - (long)old_encode_dev(file_priv->minor->device), - file_priv->authenticated); - if ((nr >= DRM_CORE_IOCTL_COUNT) && ((nr < DRM_COMMAND_BASE) || (nr >= DRM_COMMAND_END))) goto err_i1; @@ -417,6 +412,11 @@ long drm_ioctl(struct file *filp, } else goto err_i1; + DRM_DEBUG("pid=%d, dev=0x%lx, auth=%d, %s\n", + task_pid_nr(current), + (long)old_encode_dev(file_priv->minor->device), + file_priv->authenticated, ioctl->name); + /* Do not trust userspace, use our own definition */ func = ioctl->func; /* is there a local override? */ @@ -471,6 +471,12 @@ long drm_ioctl(struct file *filp, } err_i1: + if (!ioctl) + DRM_DEBUG("invalid iotcl: pid=%d, dev=0x%lx, auth=%d, cmd=0x%02x, nr=0x%02x\n", + task_pid_nr(current), + (long)old_encode_dev(file_priv->minor->device), + file_priv->authenticated, cmd, nr); + if (kdata != stack_kdata) kfree(kdata); atomic_dec(&dev->ioctl_count); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index f1ce786736e4..b092b5bcf227 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -316,6 +316,7 @@ struct drm_ioctl_desc { int flags; drm_ioctl_t *func; unsigned int cmd_drv; + const char *name; }; /** @@ -324,7 +325,7 @@ struct drm_ioctl_desc { */ #define DRM_IOCTL_DEF_DRV(ioctl, _func, _flags) \ - [DRM_IOCTL_NR(DRM_##ioctl)] = {.cmd = DRM_##ioctl, .func = _func, .flags = _flags, .cmd_drv = DRM_IOCTL_##ioctl} + [DRM_IOCTL_NR(DRM_##ioctl)] = {.cmd = DRM_##ioctl, .func = _func, .flags = _flags, .cmd_drv = DRM_IOCTL_##ioctl, .name = #ioctl} struct drm_magic_entry { struct list_head head; -- cgit v1.2.3 From 7677fc965fba41d1386fa3b76a1f00303f02bb2d Mon Sep 17 00:00:00 2001 From: Rony Efraim Date: Wed, 8 May 2013 22:22:35 +0000 Subject: net/mlx4: Strengthen VLAN tags/priorities enforcement in VST mode Make sure that the following steps are taken: - drop packets sent by the VF with vlan tag - block packets with vlan tag which are steered to the VF - drop/block tagged packets when the policy is priority-tagged - make sure VLAN stripping for received packets is set - make sure force UP bit for the VF QP is set Use enum values for all the above instead of numerical bit offsets. Signed-off-by: Rony Efraim Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_resources.c | 2 +- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 20 ++++++++++++--- include/linux/mlx4/qp.h | 29 ++++++++++++++++++++-- 3 files changed, 45 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index 91f2b2c43c12..d3f508697a3d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -60,7 +60,7 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6; if (user_prio >= 0) { context->pri_path.sched_queue |= user_prio << 3; - context->pri_path.feup = 1 << 6; + context->pri_path.feup = MLX4_FEUP_FORCE_ETH_UP; } context->pri_path.counter_index = 0xff; context->cqn_send = cpu_to_be32(cqn); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index d4a9de666fbd..1157f028a90f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -372,14 +372,28 @@ static int update_vport_qp_param(struct mlx4_dev *dev, if (MLX4_QP_ST_RC == qp_type) return -EINVAL; + /* force strip vlan by clear vsd */ + qpc->param3 &= ~cpu_to_be32(MLX4_STRIP_VLAN); + if (0 != vp_oper->state.default_vlan) { + qpc->pri_path.vlan_control = + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + } else { /* priority tagged */ + qpc->pri_path.vlan_control = + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; + } + + qpc->pri_path.fvl_rx |= MLX4_FVL_RX_FORCE_ETH_VLAN; qpc->pri_path.vlan_index = vp_oper->vlan_idx; - qpc->pri_path.fl = (1 << 6) | (1 << 2); /* set cv bit and hide_cqe_vlan bit*/ - qpc->pri_path.feup |= 1 << 3; /* set fvl bit */ + qpc->pri_path.fl |= MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN; + qpc->pri_path.feup |= MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN; qpc->pri_path.sched_queue &= 0xC7; qpc->pri_path.sched_queue |= (vp_oper->state.default_qos) << 3; } if (vp_oper->state.spoofchk) { - qpc->pri_path.feup |= 1 << 5; /* set fsm bit */; + qpc->pri_path.feup |= MLX4_FSM_FORCE_ETH_SRC_MAC; qpc->pri_path.grh_mylmc = (0x80 & qpc->pri_path.grh_mylmc) + vp_oper->mac_idx; } return 0; diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 67f46ad6920a..352eec9df1b8 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -126,7 +126,7 @@ struct mlx4_rss_context { struct mlx4_qp_path { u8 fl; - u8 reserved1[1]; + u8 vlan_control; u8 disable_pkey_check; u8 pkey_index; u8 counter_index; @@ -141,11 +141,32 @@ struct mlx4_qp_path { u8 sched_queue; u8 vlan_index; u8 feup; - u8 reserved3; + u8 fvl_rx; u8 reserved4[2]; u8 dmac[6]; }; +enum { /* fl */ + MLX4_FL_CV = 1 << 6, + MLX4_FL_ETH_HIDE_CQE_VLAN = 1 << 2 +}; +enum { /* vlan_control */ + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED = 1 << 6, + MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED = 1 << 2, + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED = 1 << 1, /* 802.1p priority tag */ + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED = 1 << 0 +}; + +enum { /* feup */ + MLX4_FEUP_FORCE_ETH_UP = 1 << 6, /* force Eth UP */ + MLX4_FSM_FORCE_ETH_SRC_MAC = 1 << 5, /* force Source MAC */ + MLX4_FVL_FORCE_ETH_VLAN = 1 << 3 /* force Eth vlan */ +}; + +enum { /* fvl_rx */ + MLX4_FVL_RX_FORCE_ETH_VLAN = 1 << 0 /* enforce Eth rx vlan */ +}; + struct mlx4_qp_context { __be32 flags; __be32 pd; @@ -185,6 +206,10 @@ struct mlx4_qp_context { u32 reserved5[10]; }; +enum { /* param3 */ + MLX4_STRIP_VLAN = 1 << 30 +}; + /* Which firmware version adds support for NEC (NoErrorCompletion) bit */ #define MLX4_FW_VER_WQE_CTRL_NEC mlx4_fw_ver(2, 2, 232) -- cgit v1.2.3 From f77d602124d865c38705df7fa25c03de9c284ad2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 May 2013 10:28:16 +0000 Subject: ipv6: do not clear pinet6 field We have seen multiple NULL dereferences in __inet6_lookup_established() After analysis, I found that inet6_sk() could be NULL while the check for sk_family == AF_INET6 was true. Bug was added in linux-2.6.29 when RCU lookups were introduced in UDP and TCP stacks. Once an IPv6 socket, using SLAB_DESTROY_BY_RCU is inserted in a hash table, we no longer can clear pinet6 field. This patch extends logic used in commit fcbdf09d9652c891 ("net: fix nulls list corruptions in sk_prot_alloc") TCP/UDP/UDPLite IPv6 protocols provide their own .clear_sk() method to make sure we do not clear pinet6 field. At socket clone phase, we do not really care, as cloning the parent (non NULL) pinet6 is not adding a fatal race. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 12 ++++++++++++ net/core/sock.c | 12 ------------ net/ipv6/tcp_ipv6.c | 12 ++++++++++++ net/ipv6/udp.c | 13 ++++++++++++- net/ipv6/udp_impl.h | 2 ++ net/ipv6/udplite.c | 2 +- 6 files changed, 39 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 5c97b0fc5623..66772cf8c3c5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -866,6 +866,18 @@ struct inet_hashinfo; struct raw_hashinfo; struct module; +/* + * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes + * un-modified. Special care is taken when initializing object to zero. + */ +static inline void sk_prot_clear_nulls(struct sock *sk, int size) +{ + if (offsetof(struct sock, sk_node.next) != 0) + memset(sk, 0, offsetof(struct sock, sk_node.next)); + memset(&sk->sk_node.pprev, 0, + size - offsetof(struct sock, sk_node.pprev)); +} + /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface * transport -> network interface is defined by struct inet_proto diff --git a/net/core/sock.c b/net/core/sock.c index d4f4cea726e7..6ba327da79e1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1217,18 +1217,6 @@ static void sock_copy(struct sock *nsk, const struct sock *osk) #endif } -/* - * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes - * un-modified. Special care is taken when initializing object to zero. - */ -static inline void sk_prot_clear_nulls(struct sock *sk, int size) -{ - if (offsetof(struct sock, sk_node.next) != 0) - memset(sk, 0, offsetof(struct sock, sk_node.next)); - memset(&sk->sk_node.pprev, 0, - size - offsetof(struct sock, sk_node.pprev)); -} - void sk_prot_clear_portaddr_nulls(struct sock *sk, int size) { unsigned long nulls1, nulls2; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 71167069b394..0a17ed9eaf39 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1890,6 +1890,17 @@ void tcp6_proc_exit(struct net *net) } #endif +static void tcp_v6_clear_sk(struct sock *sk, int size) +{ + struct inet_sock *inet = inet_sk(sk); + + /* we do not want to clear pinet6 field, because of RCU lookups */ + sk_prot_clear_nulls(sk, offsetof(struct inet_sock, pinet6)); + + size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6); + memset(&inet->pinet6 + 1, 0, size); +} + struct proto tcpv6_prot = { .name = "TCPv6", .owner = THIS_MODULE, @@ -1933,6 +1944,7 @@ struct proto tcpv6_prot = { #ifdef CONFIG_MEMCG_KMEM .proto_cgroup = tcp_proto_cgroup, #endif + .clear_sk = tcp_v6_clear_sk, }; static const struct inet6_protocol tcpv6_protocol = { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d4defdd44937..42923b14dfa6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1432,6 +1432,17 @@ void udp6_proc_exit(struct net *net) { } #endif /* CONFIG_PROC_FS */ +void udp_v6_clear_sk(struct sock *sk, int size) +{ + struct inet_sock *inet = inet_sk(sk); + + /* we do not want to clear pinet6 field, because of RCU lookups */ + sk_prot_clear_portaddr_nulls(sk, offsetof(struct inet_sock, pinet6)); + + size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6); + memset(&inet->pinet6 + 1, 0, size); +} + /* ------------------------------------------------------------------------ */ struct proto udpv6_prot = { @@ -1462,7 +1473,7 @@ struct proto udpv6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif - .clear_sk = sk_prot_clear_portaddr_nulls, + .clear_sk = udp_v6_clear_sk, }; static struct inet_protosw udpv6_protosw = { diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index d7571046bfc4..4691ed50a928 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -31,6 +31,8 @@ extern int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, extern int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); extern void udpv6_destroy_sock(struct sock *sk); +extern void udp_v6_clear_sk(struct sock *sk, int size); + #ifdef CONFIG_PROC_FS extern int udp6_seq_show(struct seq_file *seq, void *v); #endif diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 1d08e21d9f69..dfcc4be46898 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -56,7 +56,7 @@ struct proto udplitev6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif - .clear_sk = sk_prot_clear_portaddr_nulls, + .clear_sk = udp_v6_clear_sk, }; static struct inet_protosw udplite6_protosw = { -- cgit v1.2.3 From e2555fde4159467fb579e6ae3c0a8fc33015d0f5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 13 May 2013 09:45:01 -0400 Subject: jbd,jbd2: fix oops in jbd2_journal_put_journal_head() Commit ae4647fb (jbd2: reduce journal_head size) introduced a regression where we occasionally hit panic in jbd2_journal_put_journal_head() because of wrong b_jcount. The bug is caused by gcc making 64-bit access to 32-bit bitfield and thus clobbering b_jcount. At least for now, those 8 bytes saved in struct journal_head are not worth the trouble with gcc bitfield handling so revert that part of the patch. Reported-by: EUNBONG SONG Reported-by: Tony Luck Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- include/linux/journal-head.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/journal-head.h b/include/linux/journal-head.h index 13a3da25ff07..98cd41bb39c8 100644 --- a/include/linux/journal-head.h +++ b/include/linux/journal-head.h @@ -30,15 +30,19 @@ struct journal_head { /* * Journalling list for this buffer [jbd_lock_bh_state()] + * NOTE: We *cannot* combine this with b_modified into a bitfield + * as gcc would then (which the C standard allows but which is + * very unuseful) make 64-bit accesses to the bitfield and clobber + * b_jcount if its update races with bitfield modification. */ - unsigned b_jlist:4; + unsigned b_jlist; /* * This flag signals the buffer has been modified by * the currently running transaction * [jbd_lock_bh_state()] */ - unsigned b_modified:1; + unsigned b_modified; /* * Copy of the buffer data frozen for writing to the log. -- cgit v1.2.3 From b4f711ee03d28f776fd2324fd0bd999cc428e4d2 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 24 Apr 2013 11:32:56 -0700 Subject: time: Revert ALWAYS_USE_PERSISTENT_CLOCK compile time optimizaitons Kay Sievers noted that the ALWAYS_USE_PERSISTENT_CLOCK config, which enables some minor compile time optimization to avoid uncessary code in mostly the suspend/resume path could cause problems for userland. In particular, the dependency for RTC_HCTOSYS on !ALWAYS_USE_PERSISTENT_CLOCK, which avoids setting the time twice and simplifies suspend/resume, has the side effect of causing the /sys/class/rtc/rtcN/hctosys flag to always be zero, and this flag is commonly used by udev to setup the /dev/rtc symlink to /dev/rtcN, which can cause pain for older applications. While the udev rules could use some work to be less fragile, breaking userland should strongly be avoided. Additionally the compile time optimizations are fairly minor, and the code being optimized is likely to be reworked in the future, so lets revert this change. Reported-by: Kay Sievers Signed-off-by: John Stultz Cc: stable #3.9 Cc: Feng Tang Cc: Jason Gunthorpe Link: http://lkml.kernel.org/r/1366828376-18124-1-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 1 - drivers/rtc/Kconfig | 2 -- include/linux/time.h | 4 ---- kernel/time/Kconfig | 5 ----- 4 files changed, 12 deletions(-) (limited to 'include') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5db2117ae288..45c41249321f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -108,7 +108,6 @@ config X86 select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) select GENERIC_TIME_VSYSCALL if X86_64 select KTIME_SCALAR if X86_32 - select ALWAYS_USE_PERSISTENT_CLOCK select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select HAVE_CONTEXT_TRACKING if X86_64 diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 0c81915b1997..b9838130a7b0 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -20,7 +20,6 @@ if RTC_CLASS config RTC_HCTOSYS bool "Set system time from RTC on startup and resume" default y - depends on !ALWAYS_USE_PERSISTENT_CLOCK help If you say yes here, the system time (wall clock) will be set using the value read from a specified RTC device. This is useful to avoid @@ -29,7 +28,6 @@ config RTC_HCTOSYS config RTC_SYSTOHC bool "Set the RTC time based on NTP synchronization" default y - depends on !ALWAYS_USE_PERSISTENT_CLOCK help If you say yes here, the system time (wall clock) will be stored in the RTC specified by RTC_HCTOSYS_DEVICE approximately every 11 diff --git a/include/linux/time.h b/include/linux/time.h index 22d81b3c955b..d5d229b2e5af 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -117,14 +117,10 @@ static inline bool timespec_valid_strict(const struct timespec *ts) extern bool persistent_clock_exist; -#ifdef ALWAYS_USE_PERSISTENT_CLOCK -#define has_persistent_clock() true -#else static inline bool has_persistent_clock(void) { return persistent_clock_exist; } -#endif extern void read_persistent_clock(struct timespec *ts); extern void read_boot_clock(struct timespec *ts); diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 24510d84efd7..b69692250af4 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -12,11 +12,6 @@ config CLOCKSOURCE_WATCHDOG config ARCH_CLOCKSOURCE_DATA bool -# Platforms has a persistent clock -config ALWAYS_USE_PERSISTENT_CLOCK - bool - default n - # Timekeeping vsyscall support config GENERIC_TIME_VSYSCALL bool -- cgit v1.2.3 From fc1f7d5606487ae28d6c84e95401952927d7379e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 25 Apr 2013 20:31:43 +0000 Subject: clocksource: apb_timer: Remove unsused function Signed-off-by: Thomas Gleixner Acked-by: John Stultz Cc: Magnus Damm Acked-by: Jamie Iles Link: http://lkml.kernel.org/r/20130425143435.558006195@linutronix.de Signed-off-by: Thomas Gleixner --- drivers/clocksource/dw_apb_timer.c | 12 ------------ include/linux/dw_apb_timer.h | 1 - 2 files changed, 13 deletions(-) (limited to 'include') diff --git a/drivers/clocksource/dw_apb_timer.c b/drivers/clocksource/dw_apb_timer.c index 8c2a35f26d9b..e54ca1062d8e 100644 --- a/drivers/clocksource/dw_apb_timer.c +++ b/drivers/clocksource/dw_apb_timer.c @@ -387,15 +387,3 @@ cycle_t dw_apb_clocksource_read(struct dw_apb_clocksource *dw_cs) { return (cycle_t)~apbt_readl(&dw_cs->timer, APBTMR_N_CURRENT_VALUE); } - -/** - * dw_apb_clocksource_unregister() - unregister and free a clocksource. - * - * @dw_cs: The clocksource to unregister/free. - */ -void dw_apb_clocksource_unregister(struct dw_apb_clocksource *dw_cs) -{ - clocksource_unregister(&dw_cs->cs); - - kfree(dw_cs); -} diff --git a/include/linux/dw_apb_timer.h b/include/linux/dw_apb_timer.h index dd755ce2a5eb..b1cd9597c241 100644 --- a/include/linux/dw_apb_timer.h +++ b/include/linux/dw_apb_timer.h @@ -51,7 +51,6 @@ dw_apb_clocksource_init(unsigned rating, const char *name, void __iomem *base, void dw_apb_clocksource_register(struct dw_apb_clocksource *dw_cs); void dw_apb_clocksource_start(struct dw_apb_clocksource *dw_cs); cycle_t dw_apb_clocksource_read(struct dw_apb_clocksource *dw_cs); -void dw_apb_clocksource_unregister(struct dw_apb_clocksource *dw_cs); extern void dw_apb_timer_init(void); #endif /* __DW_APB_TIMER_H__ */ -- cgit v1.2.3 From ba919d1caa2e624eb8c6cae1f2ce0a253e697d45 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 25 Apr 2013 20:31:44 +0000 Subject: clocksource: Let timekeeping_notify return success/error timekeeping_notify() can fail due cs->enable() failure. Though the caller does not notice and happily keeps the wrong clocksource as the current one. Let the caller know about failure, so the current clocksource will be shown correctly in sysfs. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Cc: Magnus Damm Link: http://lkml.kernel.org/r/20130425143435.696321912@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 2 +- kernel/time/clocksource.c | 6 +++--- kernel/time/timekeeping.c | 5 +++-- 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 7279b94c01da..aa6ba44e75d5 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -321,7 +321,7 @@ static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz) } -extern void timekeeping_notify(struct clocksource *clock); +extern int timekeeping_notify(struct clocksource *clock); extern cycle_t clocksource_mmio_readl_up(struct clocksource *); extern cycle_t clocksource_mmio_readl_down(struct clocksource *); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index dda5c7130d93..1923a340bd91 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -611,10 +611,10 @@ static void clocksource_select(void) best = cs; break; } - if (curr_clocksource != best) { - printk(KERN_INFO "Switching to clocksource %s\n", best->name); + + if (curr_clocksource != best && !timekeeping_notify(best)) { + pr_info("Switched to clocksource %s\n", best->name); curr_clocksource = best; - timekeeping_notify(curr_clocksource); } } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 98cd470bbe49..da6e10c7a378 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -648,14 +648,15 @@ static int change_clocksource(void *data) * This function is called from clocksource.c after a new, better clock * source has been registered. The caller holds the clocksource_mutex. */ -void timekeeping_notify(struct clocksource *clock) +int timekeeping_notify(struct clocksource *clock) { struct timekeeper *tk = &timekeeper; if (tk->clock == clock) - return; + return 0; stop_machine(change_clocksource, clock, NULL); tick_clock_notify(); + return tk->clock == clock ? 0 : -1; } /** -- cgit v1.2.3 From 09ac369c825d9d593404306d59062d854b321e9b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 25 Apr 2013 20:31:44 +0000 Subject: clocksource: Add module refcount Add a module refcount, so the current clocksource cannot be removed unconditionally. Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Magnus Damm Link: http://lkml.kernel.org/r/20130425143435.762417789@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 3 +++ kernel/time/timekeeping.c | 19 ++++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index aa6ba44e75d5..32a895b114d8 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -21,6 +21,7 @@ /* clocksource cycle base type */ typedef u64 cycle_t; struct clocksource; +struct module; #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA #include @@ -162,6 +163,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * @suspend: suspend function for the clocksource, if necessary * @resume: resume function for the clocksource, if necessary * @cycle_last: most recent cycle counter value seen by ::read() + * @owner: module reference, must be set by clocksource in modules */ struct clocksource { /* @@ -195,6 +197,7 @@ struct clocksource { cycle_t cs_last; cycle_t wd_last; #endif + struct module *owner; } ____cacheline_aligned; /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index da6e10c7a378..933efa4071c3 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -627,11 +627,20 @@ static int change_clocksource(void *data) write_seqcount_begin(&timekeeper_seq); timekeeping_forward_now(tk); - if (!new->enable || new->enable(new) == 0) { - old = tk->clock; - tk_setup_internals(tk, new); - if (old->disable) - old->disable(old); + /* + * If the cs is in module, get a module reference. Succeeds + * for built-in code (owner == NULL) as well. + */ + if (try_module_get(new->owner)) { + if (!new->enable || new->enable(new) == 0) { + old = tk->clock; + tk_setup_internals(tk, new); + if (old->disable) + old->disable(old); + module_put(old->owner); + } else { + module_put(new->owner); + } } timekeeping_update(tk, true, true); -- cgit v1.2.3 From a89c7edbe7d7aa80f507915f3dd801211b116b79 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 25 Apr 2013 20:31:46 +0000 Subject: clocksource: Let clocksource_unregister() return success/error The unregister call can fail, if the clocksource is the current one and there is no replacement clocksource available. It can also fail, if the clocksource is the watchdog clocksource and I'm not going to provide support for this. Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Magnus Damm Link: http://lkml.kernel.org/r/20130425143436.029915527@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 2 +- kernel/time/clocksource.c | 33 ++++++++++++--------------------- 2 files changed, 13 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 32a895b114d8..2f39a4911668 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -282,7 +282,7 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift) extern int clocksource_register(struct clocksource*); -extern void clocksource_unregister(struct clocksource*); +extern int clocksource_unregister(struct clocksource*); extern void clocksource_touch_watchdog(void); extern struct clocksource* clocksource_get_next(void); extern void clocksource_change_rating(struct clocksource *cs, int rating); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 791d1aeb17ac..31b90332f47b 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -389,28 +389,17 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs) static void clocksource_dequeue_watchdog(struct clocksource *cs) { - struct clocksource *tmp; unsigned long flags; spin_lock_irqsave(&watchdog_lock, flags); - if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { - /* cs is a watched clocksource. */ - list_del_init(&cs->wd_list); - } else if (cs == watchdog) { - /* Reset watchdog cycles */ - clocksource_reset_watchdog(); - /* Current watchdog is removed. Find an alternative. */ - watchdog = NULL; - list_for_each_entry(tmp, &clocksource_list, list) { - if (tmp == cs || tmp->flags & CLOCK_SOURCE_MUST_VERIFY) - continue; - if (!watchdog || tmp->rating > watchdog->rating) - watchdog = tmp; + if (cs != watchdog) { + if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { + /* cs is a watched clocksource. */ + list_del_init(&cs->wd_list); + /* Check if the watchdog timer needs to be stopped. */ + clocksource_stop_watchdog(); } } - cs->flags &= ~CLOCK_SOURCE_WATCHDOG; - /* Check if the watchdog timer needs to be stopped. */ - clocksource_stop_watchdog(); spin_unlock_irqrestore(&watchdog_lock, flags); } @@ -841,13 +830,15 @@ static int clocksource_unbind(struct clocksource *cs) * clocksource_unregister - remove a registered clocksource * @cs: clocksource to be unregistered */ -void clocksource_unregister(struct clocksource *cs) +int clocksource_unregister(struct clocksource *cs) { + int ret = 0; + mutex_lock(&clocksource_mutex); - clocksource_dequeue_watchdog(cs); - list_del(&cs->list); - clocksource_select(); + if (!list_empty(&cs->list)) + ret = clocksource_unbind(cs); mutex_unlock(&clocksource_mutex); + return ret; } EXPORT_SYMBOL(clocksource_unregister); -- cgit v1.2.3 From 7172a286ced0c1f4f239a0fa09db54ed37d3ead2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 25 Apr 2013 20:31:47 +0000 Subject: clockevents: Get rid of the notifier chain 7+ years and still a single user. Kill it. Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Magnus Damm Link: http://lkml.kernel.org/r/20130425143436.098520211@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 1 - kernel/time/clockevents.c | 35 +++-------------------------------- kernel/time/tick-broadcast.c | 5 ++--- kernel/time/tick-common.c | 30 +++++------------------------- kernel/time/tick-internal.h | 7 ++++--- 5 files changed, 14 insertions(+), 64 deletions(-) (limited to 'include') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 963d71431388..2f498f66c1bb 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -150,7 +150,6 @@ extern void clockevents_exchange_device(struct clock_event_device *old, struct clock_event_device *new); extern void clockevents_set_mode(struct clock_event_device *dev, enum clock_event_mode mode); -extern int clockevents_register_notifier(struct notifier_block *nb); extern int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, bool force); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index c6d6400ee137..dd70b4842c62 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include "tick-internal.h" @@ -23,10 +22,6 @@ /* The registered clock event devices */ static LIST_HEAD(clockevent_devices); static LIST_HEAD(clockevents_released); - -/* Notification for clock events */ -static RAW_NOTIFIER_HEAD(clockevents_chain); - /* Protection for the above */ static DEFINE_RAW_SPINLOCK(clockevents_lock); @@ -232,30 +227,6 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, return (rc && force) ? clockevents_program_min_delta(dev) : rc; } -/** - * clockevents_register_notifier - register a clock events change listener - */ -int clockevents_register_notifier(struct notifier_block *nb) -{ - unsigned long flags; - int ret; - - raw_spin_lock_irqsave(&clockevents_lock, flags); - ret = raw_notifier_chain_register(&clockevents_chain, nb); - raw_spin_unlock_irqrestore(&clockevents_lock, flags); - - return ret; -} - -/* - * Notify about a clock event change. Called with clockevents_lock - * held. - */ -static void clockevents_do_notify(unsigned long reason, void *dev) -{ - raw_notifier_call_chain(&clockevents_chain, reason, dev); -} - /* * Called after a notify add to make devices available which were * released from the notifier call. @@ -269,7 +240,7 @@ static void clockevents_notify_released(void) struct clock_event_device, list); list_del(&dev->list); list_add(&dev->list, &clockevent_devices); - clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); + tick_check_new_device(dev); } } @@ -290,7 +261,7 @@ void clockevents_register_device(struct clock_event_device *dev) raw_spin_lock_irqsave(&clockevents_lock, flags); list_add(&dev->list, &clockevent_devices); - clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); + tick_check_new_device(dev); clockevents_notify_released(); raw_spin_unlock_irqrestore(&clockevents_lock, flags); @@ -433,7 +404,7 @@ void clockevents_notify(unsigned long reason, void *arg) int cpu; raw_spin_lock_irqsave(&clockevents_lock, flags); - clockevents_do_notify(reason, arg); + tick_notify(reason, arg); switch (reason) { case CLOCK_EVT_NOTIFY_CPU_DEAD: diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 24938d577669..3500caaa0bfd 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -64,7 +64,7 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc) /* * Check, if the device can be utilized as broadcast device: */ -int tick_check_broadcast_device(struct clock_event_device *dev) +void tick_install_broadcast_device(struct clock_event_device *dev) { struct clock_event_device *cur = tick_broadcast_device.evtdev; @@ -72,7 +72,7 @@ int tick_check_broadcast_device(struct clock_event_device *dev) (tick_broadcast_device.evtdev && tick_broadcast_device.evtdev->rating >= dev->rating) || (dev->features & CLOCK_EVT_FEAT_C3STOP)) - return 0; + return; clockevents_exchange_device(tick_broadcast_device.evtdev, dev); if (cur) @@ -90,7 +90,6 @@ int tick_check_broadcast_device(struct clock_event_device *dev) */ if (dev->features & CLOCK_EVT_FEAT_ONESHOT) tick_clock_notify(); - return 1; } /* diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 5d3fb100bc06..dbf4e18d5101 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -208,11 +208,11 @@ static void tick_setup_device(struct tick_device *td, /* * Check, if the new registered device should be used. */ -static int tick_check_new_device(struct clock_event_device *newdev) +void tick_check_new_device(struct clock_event_device *newdev) { struct clock_event_device *curdev; struct tick_device *td; - int cpu, ret = NOTIFY_OK; + int cpu; unsigned long flags; raw_spin_lock_irqsave(&tick_device_lock, flags); @@ -275,18 +275,14 @@ static int tick_check_new_device(struct clock_event_device *newdev) tick_oneshot_notify(); raw_spin_unlock_irqrestore(&tick_device_lock, flags); - return NOTIFY_STOP; + return; out_bc: /* * Can the new device be used as a broadcast device ? */ - if (tick_check_broadcast_device(newdev)) - ret = NOTIFY_STOP; - + tick_install_broadcast_device(newdev); raw_spin_unlock_irqrestore(&tick_device_lock, flags); - - return ret; } /* @@ -360,17 +356,10 @@ static void tick_resume(void) raw_spin_unlock_irqrestore(&tick_device_lock, flags); } -/* - * Notification about clock event devices - */ -static int tick_notify(struct notifier_block *nb, unsigned long reason, - void *dev) +void tick_notify(unsigned long reason, void *dev) { switch (reason) { - case CLOCK_EVT_NOTIFY_ADD: - return tick_check_new_device(dev); - case CLOCK_EVT_NOTIFY_BROADCAST_ON: case CLOCK_EVT_NOTIFY_BROADCAST_OFF: case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: @@ -404,21 +393,12 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason, default: break; } - - return NOTIFY_OK; } -static struct notifier_block tick_notifier = { - .notifier_call = tick_notify, -}; - /** * tick_init - initialize the tick control - * - * Register the notifier with the clockevents framework */ void __init tick_init(void) { - clockevents_register_notifier(&tick_notifier); tick_broadcast_init(); } diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index f0299eae4602..60742fe6f63d 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -18,6 +18,8 @@ extern int tick_do_timer_cpu __read_mostly; extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); extern void tick_handle_periodic(struct clock_event_device *dev); +extern void tick_notify(unsigned long reason, void *dev); +extern void tick_check_new_device(struct clock_event_device *dev); extern void clockevents_shutdown(struct clock_event_device *dev); @@ -90,7 +92,7 @@ static inline bool tick_broadcast_oneshot_available(void) { return false; } */ #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); -extern int tick_check_broadcast_device(struct clock_event_device *dev); +extern void tick_install_broadcast_device(struct clock_event_device *dev); extern int tick_is_broadcast_device(struct clock_event_device *dev); extern void tick_broadcast_on_off(unsigned long reason, int *oncpu); extern void tick_shutdown_broadcast(unsigned int *cpup); @@ -102,9 +104,8 @@ tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); #else /* !BROADCAST */ -static inline int tick_check_broadcast_device(struct clock_event_device *dev) +static inline void tick_install_broadcast_device(struct clock_event_device *dev) { - return 0; } static inline int tick_is_broadcast_device(struct clock_event_device *dev) -- cgit v1.2.3 From ccf33d6880f39a35158fff66db13000ae4943fac Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 25 Apr 2013 20:31:49 +0000 Subject: clockevents: Add module refcount We want to be able to remove clockevent modules as well. Add a refcount so we don't remove a module with an active clock event device. Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Magnus Damm Link: http://lkml.kernel.org/r/20130425143436.307435149@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 3 +++ kernel/time/clockevents.c | 1 + kernel/time/tick-broadcast.c | 3 +++ kernel/time/tick-common.c | 4 ++++ 4 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 2f498f66c1bb..ae1193bcf074 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -30,6 +30,7 @@ enum clock_event_nofitiers { #include struct clock_event_device; +struct module; /* Clock event mode commands */ enum clock_event_mode { @@ -83,6 +84,7 @@ enum clock_event_mode { * @irq: IRQ number (only for non CPU local devices) * @cpumask: cpumask to indicate for which CPUs this device works * @list: list head for the management code + * @owner: module reference */ struct clock_event_device { void (*event_handler)(struct clock_event_device *); @@ -112,6 +114,7 @@ struct clock_event_device { int irq; const struct cpumask *cpumask; struct list_head list; + struct module *owner; } ____cacheline_aligned; /* diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 0e3a8448e115..89e394caa769 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -357,6 +357,7 @@ void clockevents_exchange_device(struct clock_event_device *old, * released list and do a notify add later. */ if (old) { + module_put(old->owner); clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); list_del(&old->list); list_add(&old->list, &clockevents_released); diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 3500caaa0bfd..0e374cd2e0ef 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "tick-internal.h" @@ -73,6 +74,8 @@ void tick_install_broadcast_device(struct clock_event_device *dev) tick_broadcast_device.evtdev->rating >= dev->rating) || (dev->features & CLOCK_EVT_FEAT_C3STOP)) return; + if (!try_module_get(dev->owner)) + return; clockevents_exchange_device(tick_broadcast_device.evtdev, dev); if (cur) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 84c7cfca4d7d..433a1e11d13b 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -257,6 +258,9 @@ void tick_check_new_device(struct clock_event_device *newdev) goto out_bc; } + if (!try_module_get(newdev->owner)) + return; + /* * Replace the eventually existing device by the new * device. If the current device is the broadcast device, do -- cgit v1.2.3 From 03e13cf5ee60584fe0c831682c67212effb7fca4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 25 Apr 2013 20:31:50 +0000 Subject: clockevents: Implement unbind functionality Provide a sysfs interface to allow unbinding of clockevent devices. The device is unbound if it is unused or if there is a replacement device available. Unbinding of broadcast devices is not supported as we don't want to foster that nonsense. If no replacement device is available the unbind returns -EBUSY. Unbind is available from the kernel and through sysfs, which is necessary to drop the module refcount. Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Magnus Damm Link: http://lkml.kernel.org/r/20130425143436.499216659@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 1 + kernel/time/clockevents.c | 125 ++++++++++++++++++++++++++++++++++++++++++++ kernel/time/clocksource.c | 9 ++-- kernel/time/tick-common.c | 24 +++++++++ kernel/time/tick-internal.h | 7 +++ 5 files changed, 162 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index ae1193bcf074..0857922e8ad0 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -141,6 +141,7 @@ static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec, extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt); extern void clockevents_register_device(struct clock_event_device *dev); +extern int clockevents_unbind_device(struct clock_event_device *ced, int cpu); extern void clockevents_config(struct clock_event_device *dev, u32 freq); extern void clockevents_config_and_register(struct clock_event_device *dev, diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 0a23f4f29934..38959c866789 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -25,6 +25,13 @@ static LIST_HEAD(clockevent_devices); static LIST_HEAD(clockevents_released); /* Protection for the above */ static DEFINE_RAW_SPINLOCK(clockevents_lock); +/* Protection for unbind operations */ +static DEFINE_MUTEX(clockevents_mutex); + +struct ce_unbind { + struct clock_event_device *ce; + int res; +}; /** * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds @@ -245,6 +252,90 @@ static void clockevents_notify_released(void) } } +/* + * Try to install a replacement clock event device + */ +static int clockevents_replace(struct clock_event_device *ced) +{ + struct clock_event_device *dev, *newdev = NULL; + + list_for_each_entry(dev, &clockevent_devices, list) { + if (dev == ced || dev->mode != CLOCK_EVT_MODE_UNUSED) + continue; + + if (!tick_check_replacement(newdev, dev)) + continue; + + if (!try_module_get(dev->owner)) + continue; + + if (newdev) + module_put(newdev->owner); + newdev = dev; + } + if (newdev) { + tick_install_replacement(newdev); + list_del_init(&ced->list); + } + return newdev ? 0 : -EBUSY; +} + +/* + * Called with clockevents_mutex and clockevents_lock held + */ +static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu) +{ + /* Fast track. Device is unused */ + if (ced->mode == CLOCK_EVT_MODE_UNUSED) { + list_del_init(&ced->list); + return 0; + } + + return ced == per_cpu(tick_cpu_device, cpu).evtdev ? -EAGAIN : -EBUSY; +} + +/* + * SMP function call to unbind a device + */ +static void __clockevents_unbind(void *arg) +{ + struct ce_unbind *cu = arg; + int res; + + raw_spin_lock(&clockevents_lock); + res = __clockevents_try_unbind(cu->ce, smp_processor_id()); + if (res == -EAGAIN) + res = clockevents_replace(cu->ce); + cu->res = res; + raw_spin_unlock(&clockevents_lock); +} + +/* + * Issues smp function call to unbind a per cpu device. Called with + * clockevents_mutex held. + */ +static int clockevents_unbind(struct clock_event_device *ced, int cpu) +{ + struct ce_unbind cu = { .ce = ced, .res = -ENODEV }; + + smp_call_function_single(cpu, __clockevents_unbind, &cu, 1); + return cu.res; +} + +/* + * Unbind a clockevents device. + */ +int clockevents_unbind_device(struct clock_event_device *ced, int cpu) +{ + int ret; + + mutex_lock(&clockevents_mutex); + ret = clockevents_unbind(ced, cpu); + mutex_unlock(&clockevents_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(clockevents_unbind); + /** * clockevents_register_device - register a clock event device * @dev: device to register @@ -487,6 +578,38 @@ static ssize_t sysfs_show_current_tick_dev(struct device *dev, } static DEVICE_ATTR(current_device, 0444, sysfs_show_current_tick_dev, NULL); +/* We don't support the abomination of removable broadcast devices */ +static ssize_t sysfs_unbind_tick_dev(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + char name[CS_NAME_LEN]; + size_t ret = sysfs_get_uname(buf, name, count); + struct clock_event_device *ce; + + if (ret < 0) + return ret; + + ret = -ENODEV; + mutex_lock(&clockevents_mutex); + raw_spin_lock_irq(&clockevents_lock); + list_for_each_entry(ce, &clockevent_devices, list) { + if (!strcmp(ce->name, name)) { + ret = __clockevents_try_unbind(ce, dev->id); + break; + } + } + raw_spin_unlock_irq(&clockevents_lock); + /* + * We hold clockevents_mutex, so ce can't go away + */ + if (ret == -EAGAIN) + ret = clockevents_unbind(ce, dev->id); + mutex_unlock(&clockevents_mutex); + return ret ? ret : count; +} +static DEVICE_ATTR(unbind_device, 0200, NULL, sysfs_unbind_tick_dev); + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST static struct device tick_bc_dev = { .init_name = "broadcast", @@ -529,6 +652,8 @@ static int __init tick_init_sysfs(void) err = device_register(dev); if (!err) err = device_create_file(dev, &dev_attr_current_device); + if (!err) + err = device_create_file(dev, &dev_attr_unbind_device); if (err) return err; } diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 31b90332f47b..6d05b00410cc 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -31,6 +31,8 @@ #include #include +#include "tick-internal.h" + void timecounter_init(struct timecounter *tc, const struct cyclecounter *cc, u64 start_tstamp) @@ -174,7 +176,6 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec) static struct clocksource *curr_clocksource; static LIST_HEAD(clocksource_list); static DEFINE_MUTEX(clocksource_mutex); -#define CS_NAME_LEN 32 static char override_name[CS_NAME_LEN]; static int finished_booting; @@ -864,7 +865,7 @@ sysfs_show_current_clocksources(struct device *dev, return count; } -static size_t clocksource_get_uname(const char *buf, char *dst, size_t cnt) +size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt) { size_t ret = cnt; @@ -899,7 +900,7 @@ static ssize_t sysfs_override_clocksource(struct device *dev, mutex_lock(&clocksource_mutex); - ret = clocksource_get_uname(buf, override_name, count); + ret = sysfs_get_uname(buf, override_name, count); if (ret >= 0) clocksource_select(); @@ -925,7 +926,7 @@ static ssize_t sysfs_unbind_clocksource(struct device *dev, char name[CS_NAME_LEN]; size_t ret; - ret = clocksource_get_uname(buf, name, count); + ret = sysfs_get_uname(buf, name, count); if (ret < 0) return ret; diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index c34021650348..5edfb4806032 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -205,6 +205,17 @@ static void tick_setup_device(struct tick_device *td, tick_setup_oneshot(newdev, handler, next_event); } +void tick_install_replacement(struct clock_event_device *newdev) +{ + struct tick_device *td = &__get_cpu_var(tick_cpu_device); + int cpu = smp_processor_id(); + + clockevents_exchange_device(td->evtdev, newdev); + tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); + if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) + tick_oneshot_notify(); +} + static bool tick_check_percpu(struct clock_event_device *curdev, struct clock_event_device *newdev, int cpu) { @@ -236,6 +247,19 @@ static bool tick_check_preferred(struct clock_event_device *curdev, return !curdev || newdev->rating > curdev->rating; } +/* + * Check whether the new device is a better fit than curdev. curdev + * can be NULL ! + */ +bool tick_check_replacement(struct clock_event_device *curdev, + struct clock_event_device *newdev) +{ + if (tick_check_percpu(curdev, newdev, smp_processor_id())) + return false; + + return tick_check_preferred(curdev, newdev); +} + /* * Check, if the new registered device should be used. Called with * clockevents_lock held and interrupts disabled. diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 06bfc8802dfb..be1690eaecff 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -11,6 +11,8 @@ extern seqlock_t jiffies_lock; #define TICK_DO_TIMER_NONE -1 #define TICK_DO_TIMER_BOOT -2 +#define CS_NAME_LEN 32 + DECLARE_PER_CPU(struct tick_device, tick_cpu_device); extern ktime_t tick_next_period; extern ktime_t tick_period; @@ -23,9 +25,14 @@ extern void tick_handover_do_timer(int *cpup); extern void tick_shutdown(unsigned int *cpup); extern void tick_suspend(void); extern void tick_resume(void); +extern bool tick_check_replacement(struct clock_event_device *curdev, + struct clock_event_device *newdev); +extern void tick_install_replacement(struct clock_event_device *dev); extern void clockevents_shutdown(struct clock_event_device *dev); +extern size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); + /* * NO_HZ / high resolution timer shared code */ -- cgit v1.2.3 From a44b8bd6072ac82fc49af941de8c1ac85d94852d Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Tue, 7 May 2013 16:38:40 +0800 Subject: ktime: Use macro NSEC_PER_USEC where appropriate We've got the macro NSEC_PER_USEC defined in header file include/linux/time.h. To make the code decent, this patch replaces the immediate number 1000 to convert bewteen a time value in microseconds and one in nanoseconds with the macro NSEC_PER_USEC. Signed-off-by: Liu Ying Cc: David S. Miller Cc: Daniel Borkmann Cc: Thomas Gleixner Cc: John Stultz Signed-off-by: John Stultz --- include/linux/ktime.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index bbca12804d12..608728a164a9 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -229,7 +229,8 @@ static inline ktime_t timespec_to_ktime(const struct timespec ts) static inline ktime_t timeval_to_ktime(const struct timeval tv) { return (ktime_t) { .tv = { .sec = (s32)tv.tv_sec, - .nsec = (s32)tv.tv_usec * 1000 } }; + .nsec = (s32)(tv.tv_usec * + NSEC_PER_USEC) } }; } /** @@ -320,12 +321,12 @@ static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier) static inline ktime_t ktime_add_us(const ktime_t kt, const u64 usec) { - return ktime_add_ns(kt, usec * 1000); + return ktime_add_ns(kt, usec * NSEC_PER_USEC); } static inline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec) { - return ktime_sub_ns(kt, usec * 1000); + return ktime_sub_ns(kt, usec * NSEC_PER_USEC); } extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs); -- cgit v1.2.3 From 35b210855069ae34450eeaafc0f1b5bfa4f71c87 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 16 May 2013 15:47:49 +0200 Subject: ktime: Add __must_check prefix to ktime_to_timespec_cond The function is currently mainly used in the networking code and if others start using it, they must check the result, otherwise it cannot be determined if the timespec conversion suceeded. Currently no user lacks this check, but make future users aware of a possible misusage. Signed-off-by: Daniel Borkmann Signed-off-by: John Stultz --- include/linux/ktime.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 608728a164a9..fc66b301b648 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -339,7 +339,8 @@ extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs); * * Returns true if there was a successful conversion, false if kt was 0. */ -static inline bool ktime_to_timespec_cond(const ktime_t kt, struct timespec *ts) +static inline __must_check bool ktime_to_timespec_cond(const ktime_t kt, + struct timespec *ts) { if (kt.tv64) { *ts = ktime_to_timespec(kt); -- cgit v1.2.3 From 55a68c23e0a675b2b8ac2656fd6edbf98b78e4c6 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Tue, 7 May 2013 22:11:26 +0200 Subject: dw_apb_timer_of.c: Remove parts that were picoxcell-specific It seems we made a mistake when creating dw_apb_timer_of.c: picoxcell sched_clock had parts that were not related to dw_apb_timer, yet we moved them to dw_apb_timer_of, and tried to use them on socfpga. This results in system where user/system time is not measured properly, as demonstrated by time dd if=/dev/urandom of=/dev/zero bs=100000 count=100 So this patch switches sched_clock to hardware that exists on both platforms, and adds missing of_node_put() in dw_apb_timer_init(). Signed-off-by: Pavel Machek Acked-by: Jamie Iles Signed-off-by: John Stultz --- arch/arm/mach-picoxcell/common.h | 2 -- drivers/clocksource/dw_apb_timer.c | 6 ---- drivers/clocksource/dw_apb_timer_of.c | 52 ++++++++++++++++------------------- include/linux/dw_apb_timer.h | 6 ++++ 4 files changed, 29 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/arch/arm/mach-picoxcell/common.h b/arch/arm/mach-picoxcell/common.h index 481b42a4ef15..237fb3bcbd04 100644 --- a/arch/arm/mach-picoxcell/common.h +++ b/arch/arm/mach-picoxcell/common.h @@ -12,6 +12,4 @@ #include -extern void dw_apb_timer_init(void); - #endif /* __PICOXCELL_COMMON_H__ */ diff --git a/drivers/clocksource/dw_apb_timer.c b/drivers/clocksource/dw_apb_timer.c index e54ca1062d8e..e7042bc5c7d2 100644 --- a/drivers/clocksource/dw_apb_timer.c +++ b/drivers/clocksource/dw_apb_timer.c @@ -21,12 +21,6 @@ #define APBT_MIN_PERIOD 4 #define APBT_MIN_DELTA_USEC 200 -#define APBTMR_N_LOAD_COUNT 0x00 -#define APBTMR_N_CURRENT_VALUE 0x04 -#define APBTMR_N_CONTROL 0x08 -#define APBTMR_N_EOI 0x0c -#define APBTMR_N_INT_STATUS 0x10 - #define APBTMRS_INT_STATUS 0xa0 #define APBTMRS_EOI 0xa4 #define APBTMRS_RAW_INT_STATUS 0xa8 diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c index ab09ed3742ee..44a3b9163c46 100644 --- a/drivers/clocksource/dw_apb_timer_of.c +++ b/drivers/clocksource/dw_apb_timer_of.c @@ -57,6 +57,15 @@ static void add_clockevent(struct device_node *event_timer) dw_apb_clockevent_register(ced); } +static void __iomem *sched_io_base; + +/* This is actually same as __apbt_read_clocksource(), but with + different interface */ +static u32 read_sched_clock_sptimer(void) +{ + return ~__raw_readl(sched_io_base + APBTMR_N_CURRENT_VALUE); +} + static void add_clocksource(struct device_node *source_timer) { void __iomem *iobase; @@ -71,41 +80,27 @@ static void add_clocksource(struct device_node *source_timer) dw_apb_clocksource_start(cs); dw_apb_clocksource_register(cs); -} -static void __iomem *sched_io_base; - -static u32 read_sched_clock(void) -{ - return __raw_readl(sched_io_base); + sched_io_base = iobase; + setup_sched_clock(read_sched_clock_sptimer, 32, rate); } -static const struct of_device_id sptimer_ids[] __initconst = { - { .compatible = "picochip,pc3x2-rtc" }, +static const struct of_device_id osctimer_ids[] __initconst = { + { .compatible = "picochip,pc3x2-timer" }, + { .compatible = "snps,dw-apb-timer-osc" }, { .compatible = "snps,dw-apb-timer-sp" }, - { /* Sentinel */ }, + { /* Sentinel */ }, }; -static void init_sched_clock(void) -{ - struct device_node *sched_timer; - u32 rate; - - sched_timer = of_find_matching_node(NULL, sptimer_ids); - if (!sched_timer) - panic("No RTC for sched clock to use"); +/* + You don't have to use dw_apb_timer for scheduler clock, + this should also work fine on arm: - timer_get_base_and_rate(sched_timer, &sched_io_base, &rate); - of_node_put(sched_timer); + twd_local_timer_of_register(); + arch_timer_of_register(); + arch_timer_sched_clock_init(); +*/ - setup_sched_clock(read_sched_clock, 32, rate); -} - -static const struct of_device_id osctimer_ids[] __initconst = { - { .compatible = "picochip,pc3x2-timer" }, - { .compatible = "snps,dw-apb-timer-osc" }, - {}, -}; void __init dw_apb_timer_init(void) { @@ -121,7 +116,6 @@ void __init dw_apb_timer_init(void) panic("No timer for clocksource"); add_clocksource(source_timer); + of_node_put(event_timer); of_node_put(source_timer); - - init_sched_clock(); } diff --git a/include/linux/dw_apb_timer.h b/include/linux/dw_apb_timer.h index b1cd9597c241..de0904e38f33 100644 --- a/include/linux/dw_apb_timer.h +++ b/include/linux/dw_apb_timer.h @@ -17,6 +17,12 @@ #include #include +#define APBTMR_N_LOAD_COUNT 0x00 +#define APBTMR_N_CURRENT_VALUE 0x04 +#define APBTMR_N_CONTROL 0x08 +#define APBTMR_N_EOI 0x0c +#define APBTMR_N_INT_STATUS 0x10 + #define APBTMRS_REG_SIZE 0x14 struct dw_apb_timer { -- cgit v1.2.3 From 3565184ed0c1ea46bea5b792da5f72a83c43e49b Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 13 May 2013 18:56:06 +0100 Subject: x86: Increase precision of x86_platform.get/set_wallclock() All the virtualized platforms (KVM, lguest and Xen) have persistent wallclocks that have more than one second of precision. read_persistent_wallclock() and update_persistent_wallclock() allow for nanosecond precision but their implementation on x86 with x86_platform.get/set_wallclock() only allows for one second precision. This means guests may see a wallclock time that is off by up to 1 second. Make set_wallclock() and get_wallclock() take a struct timespec parameter (which allows for nanosecond precision) so KVM and Xen guests may start with a more accurate wallclock time and a Xen dom0 can maintain a more accurate wallclock for guests. Signed-off-by: David Vrabel Signed-off-by: John Stultz --- arch/x86/include/asm/mc146818rtc.h | 4 ++-- arch/x86/include/asm/x86_init.h | 6 ++++-- arch/x86/kernel/kvmclock.c | 9 +++------ arch/x86/kernel/rtc.c | 17 +++++++---------- arch/x86/lguest/boot.c | 4 ++-- arch/x86/platform/efi/efi.c | 10 ++++++---- arch/x86/xen/time.c | 19 ++++++------------- include/linux/efi.h | 4 ++-- 8 files changed, 32 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h index d354fb781c57..a55c7efcc4ed 100644 --- a/arch/x86/include/asm/mc146818rtc.h +++ b/arch/x86/include/asm/mc146818rtc.h @@ -95,8 +95,8 @@ static inline unsigned char current_lock_cmos_reg(void) unsigned char rtc_cmos_read(unsigned char addr); void rtc_cmos_write(unsigned char val, unsigned char addr); -extern int mach_set_rtc_mmss(unsigned long nowtime); -extern unsigned long mach_get_cmos_time(void); +extern int mach_set_rtc_mmss(const struct timespec *now); +extern void mach_get_cmos_time(struct timespec *now); #define RTC_IRQ 8 diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index d8d99222b36a..828a1565ba57 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -142,6 +142,8 @@ struct x86_cpuinit_ops { void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); }; +struct timespec; + /** * struct x86_platform_ops - platform specific runtime functions * @calibrate_tsc: calibrate TSC @@ -156,8 +158,8 @@ struct x86_cpuinit_ops { */ struct x86_platform_ops { unsigned long (*calibrate_tsc)(void); - unsigned long (*get_wallclock)(void); - int (*set_wallclock)(unsigned long nowtime); + void (*get_wallclock)(struct timespec *ts); + int (*set_wallclock)(const struct timespec *ts); void (*iommu_shutdown)(void); bool (*is_untracked_pat_range)(u64 start, u64 end); void (*nmi_init)(void); diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index d2c381280e3c..0db81ab511cc 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -48,10 +48,9 @@ static struct pvclock_wall_clock wall_clock; * have elapsed since the hypervisor wrote the data. So we try to account for * that with system time */ -static unsigned long kvm_get_wallclock(void) +static void kvm_get_wallclock(struct timespec *now) { struct pvclock_vcpu_time_info *vcpu_time; - struct timespec ts; int low, high; int cpu; @@ -64,14 +63,12 @@ static unsigned long kvm_get_wallclock(void) cpu = smp_processor_id(); vcpu_time = &hv_clock[cpu].pvti; - pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); + pvclock_read_wallclock(&wall_clock, vcpu_time, now); preempt_enable(); - - return ts.tv_sec; } -static int kvm_set_wallclock(unsigned long now) +static int kvm_set_wallclock(const struct timespec *now) { return -1; } diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 198eb201ed3b..0aa29394ed6f 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -38,8 +38,9 @@ EXPORT_SYMBOL(rtc_lock); * jump to the next second precisely 500 ms later. Check the Motorola * MC146818A or Dallas DS12887 data sheet for details. */ -int mach_set_rtc_mmss(unsigned long nowtime) +int mach_set_rtc_mmss(const struct timespec *now) { + unsigned long nowtime = now->tv_sec; struct rtc_time tm; int retval = 0; @@ -58,7 +59,7 @@ int mach_set_rtc_mmss(unsigned long nowtime) return retval; } -unsigned long mach_get_cmos_time(void) +void mach_get_cmos_time(struct timespec *now) { unsigned int status, year, mon, day, hour, min, sec, century = 0; unsigned long flags; @@ -107,7 +108,8 @@ unsigned long mach_get_cmos_time(void) } else year += CMOS_YEARS_OFFS; - return mktime(year, mon, day, hour, min, sec); + now->tv_sec = mktime(year, mon, day, hour, min, sec); + now->tv_nsec = 0; } /* Routines for accessing the CMOS RAM/RTC. */ @@ -135,18 +137,13 @@ EXPORT_SYMBOL(rtc_cmos_write); int update_persistent_clock(struct timespec now) { - return x86_platform.set_wallclock(now.tv_sec); + return x86_platform.set_wallclock(&now); } /* not static: needed by APM */ void read_persistent_clock(struct timespec *ts) { - unsigned long retval; - - retval = x86_platform.get_wallclock(); - - ts->tv_sec = retval; - ts->tv_nsec = 0; + x86_platform.get_wallclock(ts); } diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 7114c63f047d..8424d5adcfa2 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -882,9 +882,9 @@ int lguest_setup_irq(unsigned int irq) * It would be far better for everyone if the Guest had its own clock, but * until then the Host gives us the time on every interrupt. */ -static unsigned long lguest_get_wallclock(void) +static void lguest_get_wallclock(struct timespec *now) { - return lguest_data.time.tv_sec; + *now = lguest_data.time; } /* diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 55856b2310d3..dd3b82530145 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -352,8 +352,9 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm, return status; } -int efi_set_rtc_mmss(unsigned long nowtime) +int efi_set_rtc_mmss(const struct timespec *now) { + unsigned long nowtime = now->tv_sec; efi_status_t status; efi_time_t eft; efi_time_cap_t cap; @@ -388,7 +389,7 @@ int efi_set_rtc_mmss(unsigned long nowtime) return 0; } -unsigned long efi_get_time(void) +void efi_get_time(struct timespec *now) { efi_status_t status; efi_time_t eft; @@ -398,8 +399,9 @@ unsigned long efi_get_time(void) if (status != EFI_SUCCESS) pr_err("Oops: efitime: can't read time!\n"); - return mktime(eft.year, eft.month, eft.day, eft.hour, - eft.minute, eft.second); + now->tv_sec = mktime(eft.year, eft.month, eft.day, eft.hour, + eft.minute, eft.second); + now->tv_nsec = 0; } /* diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 3d88bfdf9e1c..a1947ac2da82 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -191,32 +191,25 @@ static void xen_read_wallclock(struct timespec *ts) put_cpu_var(xen_vcpu); } -static unsigned long xen_get_wallclock(void) +static void xen_get_wallclock(struct timespec *now) { - struct timespec ts; - - xen_read_wallclock(&ts); - return ts.tv_sec; + xen_read_wallclock(now); } -static int xen_set_wallclock(unsigned long now) +static int xen_set_wallclock(const struct timespec *now) { struct xen_platform_op op; - int rc; /* do nothing for domU */ if (!xen_initial_domain()) return -1; op.cmd = XENPF_settime; - op.u.settime.secs = now; - op.u.settime.nsecs = 0; + op.u.settime.secs = now->tv_sec; + op.u.settime.nsecs = now->tv_nsec; op.u.settime.system_time = xen_clocksource_read(); - rc = HYPERVISOR_dom0_op(&op); - WARN(rc != 0, "XENPF_settime failed: now=%ld\n", now); - - return rc; + return HYPERVISOR_dom0_op(&op); } static struct clocksource xen_clocksource __read_mostly = { diff --git a/include/linux/efi.h b/include/linux/efi.h index 2bc0ad78d058..0068bba6f8b6 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -594,8 +594,8 @@ extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); extern int __init efi_uart_console_only (void); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); -extern unsigned long efi_get_time(void); -extern int efi_set_rtc_mmss(unsigned long nowtime); +extern void efi_get_time(struct timespec *now); +extern int efi_set_rtc_mmss(const struct timespec *now); extern void efi_reserve_boot_services(void); extern struct efi_memory_map memmap; -- cgit v1.2.3 From 6cffe00f7d4e24679eae6b7aae4caaf915288256 Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Wed, 15 May 2013 14:38:11 -0700 Subject: alarmtimer: Add functions for timerfd support Add functions needed for hooking up alarmtimer to timerfd: * alarm_restart: Similar to hrtimer_restart, restart an alarmtimer after the expires time has already been updated (as with alarm_forward). * alarm_forward_now: Similar to hrtimer_forward_now, move the expires time forward to an interval from the current time of the associated clock. * alarm_start_relative: Start an alarmtimer with an expires time relative to the current time of the associated clock. * alarm_expires_remaining: Similar to hrtimer_expires_remaining, return the amount of time remaining until alarm expiry. Signed-off-by: Todd Poynor Signed-off-by: John Stultz --- include/linux/alarmtimer.h | 4 ++++ kernel/time/alarmtimer.c | 39 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h index 9069694e70eb..a899402a5a0e 100644 --- a/include/linux/alarmtimer.h +++ b/include/linux/alarmtimer.h @@ -44,10 +44,14 @@ struct alarm { void alarm_init(struct alarm *alarm, enum alarmtimer_type type, enum alarmtimer_restart (*function)(struct alarm *, ktime_t)); int alarm_start(struct alarm *alarm, ktime_t start); +int alarm_start_relative(struct alarm *alarm, ktime_t start); +void alarm_restart(struct alarm *alarm); int alarm_try_to_cancel(struct alarm *alarm); int alarm_cancel(struct alarm *alarm); u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval); +u64 alarm_forward_now(struct alarm *alarm, ktime_t interval); +ktime_t alarm_expires_remaining(const struct alarm *alarm); /* Provide way to access the rtc device being used by alarmtimers */ struct rtc_device *alarmtimer_get_rtcdev(void); diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index f11d83b12949..3e5cba274475 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -199,6 +199,12 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) } +ktime_t alarm_expires_remaining(const struct alarm *alarm) +{ + struct alarm_base *base = &alarm_bases[alarm->type]; + return ktime_sub(alarm->node.expires, base->gettime()); +} + #ifdef CONFIG_RTC_CLASS /** * alarmtimer_suspend - Suspend time callback @@ -305,7 +311,7 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, } /** - * alarm_start - Sets an alarm to fire + * alarm_start - Sets an absolute alarm to fire * @alarm: ptr to alarm to set * @start: time to run the alarm */ @@ -324,6 +330,31 @@ int alarm_start(struct alarm *alarm, ktime_t start) return ret; } +/** + * alarm_start_relative - Sets a relative alarm to fire + * @alarm: ptr to alarm to set + * @start: time relative to now to run the alarm + */ +int alarm_start_relative(struct alarm *alarm, ktime_t start) +{ + struct alarm_base *base = &alarm_bases[alarm->type]; + + start = ktime_add(start, base->gettime()); + return alarm_start(alarm, start); +} + +void alarm_restart(struct alarm *alarm) +{ + struct alarm_base *base = &alarm_bases[alarm->type]; + unsigned long flags; + + spin_lock_irqsave(&base->lock, flags); + hrtimer_set_expires(&alarm->timer, alarm->node.expires); + hrtimer_restart(&alarm->timer); + alarmtimer_enqueue(base, alarm); + spin_unlock_irqrestore(&base->lock, flags); +} + /** * alarm_try_to_cancel - Tries to cancel an alarm timer * @alarm: ptr to alarm to be canceled @@ -394,6 +425,12 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) return overrun; } +u64 alarm_forward_now(struct alarm *alarm, ktime_t interval) +{ + struct alarm_base *base = &alarm_bases[alarm->type]; + + return alarm_forward(alarm, base->gettime(), interval); +} -- cgit v1.2.3 From 38ff87f77af0b5a93fc8581cff1d6e5692ab8970 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sat, 1 Jun 2013 23:39:40 -0700 Subject: sched_clock: Make ARM's sched_clock generic for all architectures Nothing about the sched_clock implementation in the ARM port is specific to the architecture. Generalize the code so that other architectures can use it by selecting GENERIC_SCHED_CLOCK. Signed-off-by: Stephen Boyd [jstultz: Merge minor collisions with other patches in my tree] Signed-off-by: John Stultz --- arch/arm/Kconfig | 1 + arch/arm/common/timer-sp.c | 2 +- arch/arm/include/asm/sched_clock.h | 16 --- arch/arm/kernel/Makefile | 2 +- arch/arm/kernel/arch_timer.c | 2 +- arch/arm/kernel/sched_clock.c | 216 ------------------------------ arch/arm/kernel/time.c | 4 +- arch/arm/mach-davinci/time.c | 2 +- arch/arm/mach-imx/time.c | 2 +- arch/arm/mach-integrator/integrator_ap.c | 2 +- arch/arm/mach-ixp4xx/common.c | 2 +- arch/arm/mach-mmp/time.c | 2 +- arch/arm/mach-msm/timer.c | 2 +- arch/arm/mach-omap1/time.c | 2 +- arch/arm/mach-omap2/timer.c | 2 +- arch/arm/mach-pxa/time.c | 2 +- arch/arm/mach-sa1100/time.c | 2 +- arch/arm/mach-u300/timer.c | 2 +- arch/arm/plat-iop/time.c | 2 +- arch/arm/plat-omap/counter_32k.c | 2 +- arch/arm/plat-orion/time.c | 2 +- arch/arm/plat-samsung/samsung-time.c | 2 +- arch/arm/plat-versatile/sched-clock.c | 2 +- drivers/clocksource/bcm2835_timer.c | 2 +- drivers/clocksource/clksrc-dbx500-prcmu.c | 3 +- drivers/clocksource/dw_apb_timer_of.c | 3 +- drivers/clocksource/mxs_timer.c | 2 +- drivers/clocksource/nomadik-mtu.c | 2 +- drivers/clocksource/samsung_pwm_timer.c | 2 +- drivers/clocksource/tegra20_timer.c | 2 +- drivers/clocksource/time-armada-370-xp.c | 2 +- drivers/clocksource/timer-marco.c | 2 +- drivers/clocksource/timer-prima2.c | 2 +- include/linux/sched_clock.h | 21 +++ init/Kconfig | 3 + init/main.c | 2 + kernel/time/Makefile | 1 + kernel/time/sched_clock.c | 215 +++++++++++++++++++++++++++++ 38 files changed, 273 insertions(+), 266 deletions(-) delete mode 100644 arch/arm/include/asm/sched_clock.h delete mode 100644 arch/arm/kernel/sched_clock.c create mode 100644 include/linux/sched_clock.h create mode 100644 kernel/time/sched_clock.c (limited to 'include') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 49d993cee512..53d3a356f61f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -14,6 +14,7 @@ config ARM select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP + select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_IDLE_POLL_SETUP select GENERIC_STRNCPY_FROM_USER diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c index ddc740769601..023ee63827a2 100644 --- a/arch/arm/common/timer-sp.c +++ b/arch/arm/common/timer-sp.c @@ -28,8 +28,8 @@ #include #include #include +#include -#include #include #include diff --git a/arch/arm/include/asm/sched_clock.h b/arch/arm/include/asm/sched_clock.h deleted file mode 100644 index 3d520ddca61b..000000000000 --- a/arch/arm/include/asm/sched_clock.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * sched_clock.h: support for extending counters to full 64-bit ns counter - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef ASM_SCHED_CLOCK -#define ASM_SCHED_CLOCK - -extern void sched_clock_postinit(void); -extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate); - -extern unsigned long long (*sched_clock_func)(void); - -#endif diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5f3338eacad2..97cb0576d07c 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -16,7 +16,7 @@ CFLAGS_REMOVE_return_address.o = -pg # Object file lists. obj-y := elf.o entry-armv.o entry-common.o irq.o opcodes.o \ - process.o ptrace.o return_address.o sched_clock.o \ + process.o ptrace.o return_address.o \ setup.o signal.o stacktrace.o sys_arm.o time.o traps.o obj-$(CONFIG_ATAGS) += atags_parse.o diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c index 59dcdced6e30..221f07b11ccb 100644 --- a/arch/arm/kernel/arch_timer.c +++ b/arch/arm/kernel/arch_timer.c @@ -11,9 +11,9 @@ #include #include #include +#include #include -#include #include diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c deleted file mode 100644 index a781c59b93c0..000000000000 --- a/arch/arm/kernel/sched_clock.c +++ /dev/null @@ -1,216 +0,0 @@ -/* - * sched_clock.c: support for extending counters to full 64-bit ns counter - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -struct clock_data { - u64 epoch_ns; - u32 epoch_cyc; - u32 epoch_cyc_copy; - unsigned long rate; - u32 mult; - u32 shift; - bool suspended; -}; - -static void sched_clock_poll(unsigned long wrap_ticks); -static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0); -static int irqtime = -1; - -core_param(irqtime, irqtime, int, 0400); - -static struct clock_data cd = { - .mult = NSEC_PER_SEC / HZ, -}; - -static u32 __read_mostly sched_clock_mask = 0xffffffff; - -static u32 notrace jiffy_sched_clock_read(void) -{ - return (u32)(jiffies - INITIAL_JIFFIES); -} - -static u32 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; - -static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) -{ - return (cyc * mult) >> shift; -} - -static unsigned long long notrace cyc_to_sched_clock(u32 cyc, u32 mask) -{ - u64 epoch_ns; - u32 epoch_cyc; - - /* - * Load the epoch_cyc and epoch_ns atomically. We do this by - * ensuring that we always write epoch_cyc, epoch_ns and - * epoch_cyc_copy in strict order, and read them in strict order. - * If epoch_cyc and epoch_cyc_copy are not equal, then we're in - * the middle of an update, and we should repeat the load. - */ - do { - epoch_cyc = cd.epoch_cyc; - smp_rmb(); - epoch_ns = cd.epoch_ns; - smp_rmb(); - } while (epoch_cyc != cd.epoch_cyc_copy); - - return epoch_ns + cyc_to_ns((cyc - epoch_cyc) & mask, cd.mult, cd.shift); -} - -/* - * Atomically update the sched_clock epoch. - */ -static void notrace update_sched_clock(void) -{ - unsigned long flags; - u32 cyc; - u64 ns; - - cyc = read_sched_clock(); - ns = cd.epoch_ns + - cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, - cd.mult, cd.shift); - /* - * Write epoch_cyc and epoch_ns in a way that the update is - * detectable in cyc_to_fixed_sched_clock(). - */ - raw_local_irq_save(flags); - cd.epoch_cyc_copy = cyc; - smp_wmb(); - cd.epoch_ns = ns; - smp_wmb(); - cd.epoch_cyc = cyc; - raw_local_irq_restore(flags); -} - -static void sched_clock_poll(unsigned long wrap_ticks) -{ - mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks)); - update_sched_clock(); -} - -void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) -{ - unsigned long r, w; - u64 res, wrap; - char r_unit; - - if (cd.rate > rate) - return; - - BUG_ON(bits > 32); - WARN_ON(!irqs_disabled()); - read_sched_clock = read; - sched_clock_mask = (1 << bits) - 1; - cd.rate = rate; - - /* calculate the mult/shift to convert counter ticks to ns. */ - clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 0); - - r = rate; - if (r >= 4000000) { - r /= 1000000; - r_unit = 'M'; - } else if (r >= 1000) { - r /= 1000; - r_unit = 'k'; - } else - r_unit = ' '; - - /* calculate how many ns until we wrap */ - wrap = cyc_to_ns((1ULL << bits) - 1, cd.mult, cd.shift); - do_div(wrap, NSEC_PER_MSEC); - w = wrap; - - /* calculate the ns resolution of this counter */ - res = cyc_to_ns(1ULL, cd.mult, cd.shift); - pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n", - bits, r, r_unit, res, w); - - /* - * Start the timer to keep sched_clock() properly updated and - * sets the initial epoch. - */ - sched_clock_timer.data = msecs_to_jiffies(w - (w / 10)); - update_sched_clock(); - - /* - * Ensure that sched_clock() starts off at 0ns - */ - cd.epoch_ns = 0; - - /* Enable IRQ time accounting if we have a fast enough sched_clock */ - if (irqtime > 0 || (irqtime == -1 && rate >= 1000000)) - enable_sched_clock_irqtime(); - - pr_debug("Registered %pF as sched_clock source\n", read); -} - -static unsigned long long notrace sched_clock_32(void) -{ - u32 cyc = read_sched_clock(); - return cyc_to_sched_clock(cyc, sched_clock_mask); -} - -unsigned long long __read_mostly (*sched_clock_func)(void) = sched_clock_32; - -unsigned long long notrace sched_clock(void) -{ - if (cd.suspended) - return cd.epoch_ns; - - return sched_clock_func(); -} - -void __init sched_clock_postinit(void) -{ - /* - * If no sched_clock function has been provided at that point, - * make it the final one one. - */ - if (read_sched_clock == jiffy_sched_clock_read) - setup_sched_clock(jiffy_sched_clock_read, 32, HZ); - - sched_clock_poll(sched_clock_timer.data); -} - -static int sched_clock_suspend(void) -{ - sched_clock_poll(sched_clock_timer.data); - cd.suspended = true; - return 0; -} - -static void sched_clock_resume(void) -{ - cd.epoch_cyc = read_sched_clock(); - cd.epoch_cyc_copy = cd.epoch_cyc; - cd.suspended = false; -} - -static struct syscore_ops sched_clock_ops = { - .suspend = sched_clock_suspend, - .resume = sched_clock_resume, -}; - -static int __init sched_clock_syscore_init(void) -{ - register_syscore_ops(&sched_clock_ops); - return 0; -} -device_initcall(sched_clock_syscore_init); diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index abff4e9aaee0..98aee3258398 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -24,9 +24,9 @@ #include #include #include +#include #include -#include #include #include #include @@ -120,6 +120,4 @@ void __init time_init(void) machine_desc->init_time(); else clocksource_of_init(); - - sched_clock_postinit(); } diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c index bad361ec1666..7a55b5c95971 100644 --- a/arch/arm/mach-davinci/time.c +++ b/arch/arm/mach-davinci/time.c @@ -18,8 +18,8 @@ #include #include #include +#include -#include #include #include diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c index fea91313678b..cd46529e9eaa 100644 --- a/arch/arm/mach-imx/time.c +++ b/arch/arm/mach-imx/time.c @@ -26,8 +26,8 @@ #include #include #include +#include -#include #include #include "common.h" diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c index b23c8e4f28e8..aa4346227c41 100644 --- a/arch/arm/mach-integrator/integrator_ap.c +++ b/arch/arm/mach-integrator/integrator_ap.c @@ -41,6 +41,7 @@ #include #include #include +#include #include